diff --git a/.gitignore b/.gitignore
index 2fea1022910..9480c2618bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,9 +78,6 @@ datasets/*
 !datasets/karate-disjoint.csv
 !datasets/netscience.csv
 
-# nx-cugraph side effects
-python/nx-cugraph/objects.inv
-
 .pydevproject
 
 # Jupyter Notebooks
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b5fbcf9ad42..4bb037b5fda 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,6 @@ repos:
         language_version: python3
         args: [--target-version=py310]
         files: ^(python/.*|benchmarks/.*)$
-        exclude: ^python/nx-cugraph/
   - repo: https://github.com/PyCQA/flake8
     rev: 7.1.1
     hooks:
@@ -59,23 +58,3 @@ repos:
     hooks:
         - id: rapids-dependency-file-generator
           args: ["--clean"]
-  - repo: local
-    hooks:
-      - id: nx-cugraph-meta-data-update
-        name: nx-cugraph meta-data updater
-        entry: bash -c "PYTHONPATH=./python/nx-cugraph python ./python/nx-cugraph/_nx_cugraph/__init__.py"
-        files: ^python/nx-cugraph/
-        types: [python]
-        language: python
-        pass_filenames: false
-        additional_dependencies: ["networkx>=3.4"]
-  - repo: local
-    hooks:
-      - id: nx-cugraph-readme-update
-        name: nx-cugraph README updater
-        entry: bash -c "PYTHONPATH=./python/nx-cugraph python ./python/nx-cugraph/scripts/update_readme.py ./python/nx-cugraph/README.md"
-        files: ^python/nx-cugraph/
-        types_or: [python, markdown]
-        language: python
-        pass_filenames: false
-        additional_dependencies: ["networkx>=3.4"]
diff --git a/README.md b/README.md
index 8026e4feb64..e41caec17b0 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@
 -----
 ## News
 
-___NEW!___   _[nx-cugraph](./python/nx-cugraph/README.md)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change.
+___NEW!___   _[nx-cugraph](https://rapids.ai/nx-cugraph/)_, a NetworkX backend that provides GPU acceleration to NetworkX with zero code change.
 ```
 > pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com
 > export NETWORKX_AUTOMATIC_BACKENDS=cugraph
@@ -62,7 +62,7 @@ That's it.  NetworkX now leverages cuGraph for accelerated graph algorithms.
     - [External Data Types](./readme_pages/data_types.md)
   - [pylibcugraph](./readme_pages/pylibcugraph.md)
   - [libcugraph (C/C++/CUDA)](./readme_pages/libcugraph.md)
-  - [nx-cugraph](./python/nx-cugraph/README.md)
+  - [nx-cugraph](https://rapids.ai/nx-cugraph/)
   - [cugraph-service](./readme_pages/cugraph_service.md)
   - [cugraph-dgl](./readme_pages/cugraph_dgl.md)
   - [cugraph-ops](./readme_pages/cugraph_ops.md)
@@ -127,7 +127,7 @@ df_page.sort_values('pagerank', ascending=False).head(10)
 * ArangoDB - a free and open-source native multi-model database system  - https://www.arangodb.com/
 * CuPy - "NumPy/SciPy-compatible Array Library for GPU-accelerated Computing with Python" -  https://cupy.dev/
 * Memgraph - In-memory Graph database - https://memgraph.com/
-* NetworkX (via [nx-cugraph](./python/nx-cugraph/README.md) backend) - an extremely popular, free and open-source package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks - https://networkx.org/
+* NetworkX (via [nx-cugraph](https://rapids.ai/nx-cugraph/) backend) - an extremely popular, free and open-source package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks - https://networkx.org/
 * PyGraphistry - free and open-source GPU graph ETL, AI, and visualization, including native RAPIDS & cuGraph support - http://github.com/graphistry/pygraphistry
 * ScanPy - a scalable toolkit for analyzing single-cell gene expression data - https://scanpy.readthedocs.io/en/stable/
 
diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md
deleted file mode 100644
index 414a22171a0..00000000000
--- a/benchmarks/nx-cugraph/pytest-based/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-## `nx-cugraph` Benchmarks
-
-### Overview
-
-This directory contains a set of scripts designed to benchmark NetworkX with the `nx-cugraph` backend and deliver a report that summarizes the speed-up and runtime deltas over default NetworkX.
-
-Our current benchmarks provide the following datasets:
-
-| Dataset     | Nodes | Edges | Directed |
-| --------    | ------- | ------- | ------- |
-| netscience  | 1,461    | 5,484 | Yes |
-| email-Eu-core  | 1,005    | 25,571 | Yes |
-| amazon0302  | 262,111  | 1,234,877 | Yes |
-| cit-Patents  | 3,774,768    | 16,518,948 | Yes |
-| hollywood  | 1,139,905    | 57,515,616 | No |
-| soc-LiveJournal1  | 4,847,571    | 68,993,773 | Yes |
-
-
-
-### Scripts
-
-#### 1. `run-main-benchmarks.sh`
-This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
-
-NOTE:
- - If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
- - The `betweenness_centrality` benchmark will run with values `[10, 20, 50, 100, 500, 1000]` by default. You can specify only specific k-values to be run by editing `bc_k_values` (line 46) to be passed as a [pytest keyword object](https://docs.pytest.org/en/6.2.x/usage.html#specifying-tests-selecting-tests).
-
-**Usage:**
- - Run with `--cpu-only`:
-  ```bash
-  ./run-main-benchmarks.sh --cpu-only
-  ```
- - Run with `--gpu-only`:
-  ```bash
-  ./run-main-benchmarks.sh --gpu-only
-  ```
- - Run without any arguments (all backends):
-  ```bash
-  ./run-main-benchmarks.sh
-  ```
-
-#### 2. `create_results_summary_page.py`
-This script is designed to be run after `run-main-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run.
-
-**Usage:**
-  ```bash
-  python create_results_summary_page.py > report.html
-  ```
diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py
deleted file mode 100644
index 8852ed2a875..00000000000
--- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py
+++ /dev/null
@@ -1,985 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import random
-
-import networkx as nx
-import pandas as pd
-import pytest
-from cugraph import datasets
-import nx_cugraph as nxcg
-
-# Attempt to import the NetworkX dispatching module, which is only needed when
-# testing with NX <3.2 in order to dynamically switch backends. NX >=3.2 allows
-# the backend to be specified directly in the API call.
-try:
-    from networkx.classes import backends  # NX <3.2
-except ImportError:
-    backends = None
-
-
-################################################################################
-# Fixtures and params
-
-# See https://pytest-benchmark.readthedocs.io/en/latest/glossary.html for how
-# these variables are used.
-rounds = 1
-iterations = 1
-warmup_rounds = 1
-
-# FIXME: Add this to cugraph.datasets.  This is done here so these benchmarks
-# can be run without requiring an updated cugraph install.  This temporarily
-# adds a dataset based on an Amazon product co-purchasing network.
-amazon0302_metadata = """
-name: amazon0302
-description:
-  Network was collected by crawling Amazon website. It is based on Customers Who Bought This Item Also Bought feature of the Amazon website. If a product i is frequently co-purchased with product j, the graph contains a directed edge from i to j. The data was collected in March 02 2003.
-author: J. Leskovec, L. Adamic and B. Adamic
-refs: J. Leskovec, L. Adamic and B. Adamic. The Dynamics of Viral Marketing. ACM Transactions on the Web (ACM TWEB), 1(1), 2007.
-delim: "\t"
-header: 3
-col_names:
-  - FromNodeId
-  - ToNodeId
-col_types:
-  - int32
-  - int32
-has_loop: false
-is_directed: true
-is_multigraph: false
-is_symmetric: false
-number_of_edges: 1234877
-number_of_nodes: 262111
-url: https://snap.stanford.edu/data/amazon0302.txt.gz
-"""
-amazon0302_metadata_file_name = datasets.default_download_dir.path / "amazon0302.yaml"
-if not amazon0302_metadata_file_name.exists():
-    amazon0302_metadata_file_name.parent.mkdir(parents=True, exist_ok=True)
-    with open(amazon0302_metadata_file_name, "w") as f:
-        f.write(amazon0302_metadata)
-
-amazon0302_dataset = datasets.Dataset(amazon0302_metadata_file_name)
-amazon0302_dataset.metadata["file_type"] = ".gz"
-
-dataset_param_values = [
-    # name: karate, nodes: 34, edges: 156
-    pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]),
-    # name: netscience, nodes: 1461, edges: 5484
-    pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]),
-    # name: email-Eu-core, nodes: 1005, edges: 25571
-    pytest.param(
-        datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed]
-    ),
-    # name: amazon0302, nodes: 262111, edges: 1234877
-    pytest.param(amazon0302_dataset, marks=[pytest.mark.medium, pytest.mark.directed]),
-    # name: cit-Patents, nodes: 3774768, edges: 16518948
-    pytest.param(
-        datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed]
-    ),
-    # name: hollywood, nodes: 1139905, edges: 57515616
-    pytest.param(
-        datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected]
-    ),
-    # name: soc-LiveJournal1, nodes: 4847571, edges: 68993773
-    pytest.param(
-        datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed]
-    ),
-    # name: europe_osm, nodes: 50912018, edges: 54054660
-    pytest.param(
-        datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected]
-    ),
-]
-
-backend_param_values = ["cugraph", "cugraph-preconverted", None]
-
-
-def setup_module(module):
-    """
-    Trivial conversion call to force various one-time CUDA initialization
-    operations to happen outside of benchmarks.
-    """
-    G = nx.karate_club_graph()
-    nxcg.from_networkx(G)
-
-
-# Test IDs are generated using the lambda assigned to the ids arg to provide an
-# easier-to-read name. This is especially helpful for Dataset objs (see
-# https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture)
-@pytest.fixture(
-    scope="module", params=dataset_param_values, ids=lambda ds: f"ds={str(ds)}"
-)
-def graph_obj(request):
-    """
-    Returns a NX Graph or DiGraph obj from the dataset instance parameter.
-    """
-    dataset = request.param
-    return nx_graph_from_dataset(dataset)
-
-
-@pytest.fixture(
-    scope="module",
-    params=backend_param_values,
-    ids=lambda backend: f"backend={backend}",
-)
-def backend(request):
-    """
-    Returns the backend name to use. This is done as a fixture for consistency
-    and simplicity when creating benchmarks (no need to mark the benchmark as
-    parametrized).
-    """
-    return request.param
-
-
-################################################################################
-# Helpers
-def nx_graph_from_dataset(dataset_obj):
-    """
-    Read the dataset specified by the dataset_obj and create and return a
-    nx.Graph or nx.DiGraph instance based on the dataset is_directed metadata.
-    """
-    create_using = nx.DiGraph if dataset_obj.metadata["is_directed"] else nx.Graph
-    names = dataset_obj.metadata["col_names"]
-    pandas_edgelist = dataset_obj.get_edgelist(download=True, reader="pandas")
-    G = nx.from_pandas_edgelist(
-        pandas_edgelist, source=names[0], target=names[1], create_using=create_using
-    )
-    return G
-
-
-def get_legacy_backend_wrapper(backend_name):
-    """
-    Returns a callable that wraps an algo function with either the default
-    dispatcher (which dispatches based on input graph type), or the "testing"
-    dispatcher (which autoconverts and unconditionally dispatches).
-    This is only supported for NetworkX <3.2
-    """
-    backends.plugin_name = "cugraph"
-    orig_dispatch = backends._dispatch
-    testing_dispatch = backends.test_override_dispatch
-
-    if backend_name == "cugraph":
-        dispatch = testing_dispatch
-    else:
-        dispatch = orig_dispatch
-
-    def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False):
-        # Networkx <3.2 registers functions when the dispatch decorator is
-        # applied (called) and errors if re-registered, so clear bookkeeping to
-        # allow it to be called repeatedly.
-        backends._registered_algorithms = {}
-        actual_func = dispatch(func)  # returns the func the dispatcher picks
-
-        def wrapper(*args, **kwargs):
-            retval = actual_func(*args, **kwargs)
-            if exhaust_returned_iterator:
-                retval = list(retval)
-            return retval
-
-        return wrapper
-
-    return wrap_callable_for_dispatch
-
-
-def get_backend_wrapper(backend_name):
-    """
-    Returns a callable that wraps an algo function in order to set the
-    "backend" kwarg on it.
-    This is only supported for NetworkX >= 3.2
-    """
-
-    def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False):
-        def wrapper(*args, **kwargs):
-            kwargs["backend"] = backend_name
-            retval = func(*args, **kwargs)
-            if exhaust_returned_iterator:
-                retval = list(retval)
-            return retval
-
-        return wrapper
-
-    return wrap_callable_for_dispatch
-
-
-@pytest.fixture(
-    scope="module",
-    params=backend_param_values,
-    ids=lambda backend: f"backend={backend}",
-)
-def backend_wrapper(request):
-    """
-    Returns a callable that takes a function algo and wraps it in another
-    function that calls the algo using the appropriate backend.
-
-    For example: if the backend to test is "cugraph", this will return a
-    function that calls nx.pagerank(..., backend='cugraph')
-    """
-    backend_name = request.param
-    actual_backend_name = backend_name
-
-    # Special case: cugraph-preconverted may be specified as a backend but this
-    # name is reserved to indicate a cugraph backend is to be used with a
-    # preconverted graph obj (rather than having the backend do the
-    # conversion).
-    if backend_name == "cugraph-preconverted":
-        actual_backend_name = "cugraph"
-
-    # NX <3.2 does not support the backends= kwarg, so the backend must be
-    # enabled differently
-    if backends is not None:
-        wrapper = get_legacy_backend_wrapper(actual_backend_name)
-    else:
-        wrapper = get_backend_wrapper(actual_backend_name)
-
-    wrapper.backend_name = backend_name
-    return wrapper
-
-
-def get_graph_obj_for_benchmark(graph_obj, backend_wrapper):
-    """
-    Given a Graph object and a backend name, return a converted Graph or the
-    original Graph object based on the backend to use.
-
-    This is needed because some backend names are actually used as descriptions
-    for combinations of backends and converted/non-converted graphs.  For
-    example, a benchmark may specify the "cugraph-preconverted" backend, which
-    is not an installed backend but instead refers to the "cugraph" backend
-    passed a NX Graph that has been converted to a nx-cugraph Graph object.
-    """
-    G = graph_obj
-    if backend_wrapper.backend_name == "cugraph-preconverted":
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    return G
-
-
-def get_highest_degree_node(graph_obj):
-    degrees = graph_obj.degree()  # list of tuples of (node, degree)
-    return max(degrees, key=lambda t: t[1])[0]
-
-
-def build_personalization_dict(pagerank_dict):
-    """
-    Returns a dictionary that can be used as the personalization value for a
-    call to nx.pagerank(). The pagerank_dict passed in is used as the initial
-    source of values for each node, and this function simply treats the list of
-    dict values as two halves (halves A and B) and swaps them so (most if not
-    all) nodes/keys are assigned a different value from the dictionary.
-    """
-    num_half = len(pagerank_dict) // 2
-    A_half_items = list(pagerank_dict.items())[:num_half]
-    B_half_items = list(pagerank_dict.items())[num_half:]
-
-    # Support an odd number of items by initializing with B_half_items, which
-    # will always be one bigger if the number of items is odd. This will leave
-    # the one remainder (in the case of an odd number) unchanged.
-    pers_dict = dict(B_half_items)
-    pers_dict.update({A_half_items[i][0]: B_half_items[i][1] for i in range(num_half)})
-    pers_dict.update({B_half_items[i][0]: A_half_items[i][1] for i in range(num_half)})
-
-    return pers_dict
-
-
-################################################################################
-# Benchmarks
-def bench_from_networkx(benchmark, graph_obj):
-    benchmark(nxcg.from_networkx, graph_obj)
-
-
-# normalized_param_values = [True, False]
-normalized_param_values = [True]
-k_param_values = [10, 20, 50, 100, 500, 1000]
-
-
-@pytest.mark.parametrize(
-    "normalized", normalized_param_values, ids=lambda norm: f"{norm=}"
-)
-@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
-def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    if k > G.number_of_nodes():
-        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
-
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.betweenness_centrality),
-        args=(G,),
-        kwargs=dict(
-            weight=None,
-            normalized=normalized,
-            k=k,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-@pytest.mark.parametrize(
-    "normalized", normalized_param_values, ids=lambda norm: f"{norm=}"
-)
-@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
-def bench_edge_betweenness_centrality(
-    benchmark, graph_obj, backend_wrapper, normalized, k
-):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-
-    if k > G.number_of_nodes():
-        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
-
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.edge_betweenness_centrality),
-        args=(G,),
-        kwargs=dict(
-            weight=None,
-            normalized=normalized,
-            k=k,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_louvain_communities(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.community.louvain_communities),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_degree_centrality(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.degree_centrality),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_eigenvector_centrality(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.eigenvector_centrality),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-@pytest.mark.parametrize(
-    "normalized", normalized_param_values, ids=lambda norm: f"{norm=}"
-)
-def bench_hits(benchmark, graph_obj, backend_wrapper, normalized):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.hits),
-        args=(G,),
-        kwargs=dict(
-            normalized=normalized,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is tuple
-    assert len(result) == 2
-    assert type(result[0]) is dict
-    assert type(result[1]) is dict
-
-
-def bench_in_degree_centrality(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.in_degree_centrality),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-@pytest.mark.parametrize(
-    "normalized", normalized_param_values, ids=lambda norm: f"{norm=}"
-)
-def bench_katz_centrality(benchmark, graph_obj, backend_wrapper, normalized):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.katz_centrality),
-        args=(G,),
-        kwargs=dict(
-            normalized=normalized,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_k_truss(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.k_truss),
-        args=(G,),
-        kwargs=dict(
-            k=2,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    # Check that this at least appears to be some kind of NX-like Graph
-    assert hasattr(result, "has_node")
-
-
-def bench_out_degree_centrality(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.out_degree_centrality),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_pagerank(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.pagerank),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-
-    # FIXME: This will run for every combination of inputs, even if the
-    # graph/dataset does not change. Ideally this is run once per
-    # graph/dataset.
-    pagerank_dict = nx.pagerank(G)
-    personalization_dict = build_personalization_dict(pagerank_dict)
-
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.pagerank),
-        args=(G,),
-        kwargs={"personalization": personalization_dict},
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
-    """
-    This passes in the source node with the highest degree, but no target.
-    """
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.shortest_path),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.single_source_shortest_path_length),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(
-            nx.single_target_shortest_path_length, exhaust_returned_iterator=True
-        ),
-        args=(G,),
-        kwargs=dict(
-            target=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    # exhaust_returned_iterator=True forces the result to a list, but is not
-    # needed for this algo in NX 3.3+ which returns a dict instead of an
-    # iterator. Forcing to a list does not change the benchmark timing.
-    assert type(result) is list
-
-
-def bench_ancestors(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.ancestors),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is set
-
-
-def bench_average_clustering(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported by nx-cugraph
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.average_clustering),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is float
-
-
-def bench_generic_bfs_edges(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.generic_bfs_edges, exhaust_returned_iterator=True),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_bfs_edges(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.bfs_edges, exhaust_returned_iterator=True),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_bfs_layers(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.bfs_layers, exhaust_returned_iterator=True),
-        args=(G,),
-        kwargs=dict(
-            sources=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_bfs_predecessors(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.bfs_predecessors, exhaust_returned_iterator=True),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_bfs_successors(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.bfs_successors, exhaust_returned_iterator=True),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_bfs_tree(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.bfs_tree),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    # Check that this at least appears to be some kind of NX-like Graph
-    assert hasattr(result, "has_node")
-
-
-def bench_clustering(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported by nx-cugraph
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.clustering),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_core_number(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported by nx-cugraph
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.core_number),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_descendants(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.descendants),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is set
-
-
-def bench_descendants_at_distance(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.descendants_at_distance),
-        args=(G,),
-        kwargs=dict(
-            source=node,
-            distance=1,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is set
-
-
-def bench_is_bipartite(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.is_bipartite),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is bool
-
-
-def bench_is_strongly_connected(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.is_strongly_connected),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is bool
-
-
-def bench_is_weakly_connected(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.is_weakly_connected),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is bool
-
-
-def bench_number_strongly_connected_components(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.number_strongly_connected_components),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is int
-
-
-def bench_number_weakly_connected_components(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.number_weakly_connected_components),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is int
-
-
-def bench_overall_reciprocity(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.overall_reciprocity),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is float
-
-
-def bench_reciprocity(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.reciprocity),
-        args=(G,),
-        kwargs=dict(
-            nodes=node,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is float
-
-
-def bench_strongly_connected_components(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(
-            nx.strongly_connected_components, exhaust_returned_iterator=True
-        ),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_transitivity(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported by nx-cugraph
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.transitivity),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is float
-
-
-def bench_triangles(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    # DiGraphs are not supported
-    if G.is_directed():
-        G = G.to_undirected()
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.triangles),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is dict
-
-
-def bench_weakly_connected_components(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    result = benchmark.pedantic(
-        target=backend_wrapper(
-            nx.weakly_connected_components, exhaust_returned_iterator=True
-        ),
-        args=(G,),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert type(result) is list
-
-
-def bench_ego_graph(benchmark, graph_obj, backend_wrapper):
-    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
-    node = get_highest_degree_node(graph_obj)
-    result = benchmark.pedantic(
-        target=backend_wrapper(nx.ego_graph),
-        args=(G,),
-        kwargs=dict(
-            n=node,
-            radius=100,
-        ),
-        rounds=rounds,
-        iterations=iterations,
-        warmup_rounds=warmup_rounds,
-    )
-    assert isinstance(result, (nx.Graph, nxcg.Graph))
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_complete_bipartite_graph(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_connected_components(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_connected(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_node_connected_component(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_number_connected_components(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_isolate(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_isolates(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_number_of_isolates(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_complement(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_reverse(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_arborescence(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_branching(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_forest(benchmark, graph_obj, backend_wrapper):
-    pass
-
-
-@pytest.mark.skip(reason="benchmark not implemented")
-def bench_is_tree(benchmark, graph_obj, backend_wrapper):
-    pass
diff --git a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py
deleted file mode 100644
index e4aff10f0a5..00000000000
--- a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py
+++ /dev/null
@@ -1,293 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import re
-import pathlib
-import json
-import platform
-import psutil
-import socket
-import subprocess
-
-
-def get_formatted_time_value(time):
-    res = ""
-    if time < 1:
-        if time < 0.001:
-            units = "us"
-            time *= 1e6
-        else:
-            units = "ms"
-            time *= 1e3
-    else:
-        units = "s"
-    return f"{time:.3f}{units}"
-
-
-def get_all_benchmark_info():
-    benchmarks = {}
-    # Populate benchmarks dir from .json files
-    for json_file in logs_dir.glob("*.json"):
-        try:
-            data = json.loads(open(json_file).read())
-        except json.decoder.JSONDecodeError:
-            continue
-
-        for benchmark_run in data["benchmarks"]:
-            # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
-            name = benchmark_run["name"]
-
-            algo_name = name.split("[")[0]
-            if algo_name.startswith("bench_"):
-                algo_name = algo_name[6:]
-            # special case for betweenness_centrality
-            match = k_patt.match(name)
-            if match is not None:
-                algo_name += f", k={match.group(1)}"
-
-            match = dataset_patt.match(name)
-            if match is None:
-                raise RuntimeError(
-                    f"benchmark name {name} in file {json_file} has an unexpected format"
-                )
-            dataset = match.group(1)
-            if dataset.endswith("-backend"):
-                dataset = dataset[:-8]
-
-            match = backend_patt.match(name)
-            if match is None:
-                raise RuntimeError(
-                    f"benchmark name {name} in file {json_file} has an unexpected format"
-                )
-            backend = match.group(1)
-            if backend == "None":
-                backend = "networkx"
-
-            runtime = benchmark_run["stats"]["mean"]
-            benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[
-                dataset
-            ] = runtime
-    return benchmarks
-
-
-def compute_perf_vals(cugraph_runtime, networkx_runtime):
-    speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X"
-    delta = networkx_runtime - cugraph_runtime
-    if abs(delta) < 1:
-        if abs(delta) < 0.001:
-            units = "us"
-            delta *= 1e6
-        else:
-            units = "ms"
-            delta *= 1e3
-    else:
-        units = "s"
-    delta_string = f"{delta:.3f}{units}"
-
-    return (speedup_string, delta_string)
-
-
-def get_mem_info():
-    return round(psutil.virtual_memory().total / (1024**3), 2)
-
-
-def get_cuda_version():
-    output = subprocess.check_output("nvidia-smi", shell=True).decode()
-    try:
-        return next(
-            line.split("CUDA Version: ")[1].split()[0]
-            for line in output.splitlines()
-            if "CUDA Version" in line
-        )
-    except subprocess.CalledProcessError:
-        return "Failed to get CUDA version."
-
-
-def get_first_gpu_info():
-    try:
-        gpu_info = (
-            subprocess.check_output(
-                "nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used --format=csv,noheader",
-                shell=True,
-            )
-            .decode()
-            .strip()
-        )
-        if gpu_info:
-            gpus = gpu_info.split("\n")
-            num_gpus = len(gpus)
-            first_gpu = gpus[0]  # Get the information for the first GPU
-            gpu_name, mem_total, _, _ = first_gpu.split(",")
-            return f"{num_gpus} x {gpu_name.strip()} ({round(int(mem_total.strip().split()[0]) / (1024), 2)} GB)"
-        else:
-            print("No GPU found or unable to query GPU details.")
-    except subprocess.CalledProcessError:
-        print("Failed to execute nvidia-smi. No GPU information available.")
-
-
-def get_system_info():
-    print('<div class="box2">')
-    print(f"<p>Hostname: {socket.gethostname()}</p>")
-    print(
-        f'<p class="indent"">Operating System: {platform.system()} {platform.release()}</p>'
-    )
-    print(f'<p class="indent">Kernel Version  : {platform.version()}</p>')
-    with open("/proc/cpuinfo") as f:
-        print(
-            f'<p>CPU: {next(line.strip().split(": ")[1] for line in f if "model name" in line)} ({psutil.cpu_count(logical=False)} cores)</p>'
-        )
-    print(f'<p class="indent">Memory: {get_mem_info()} GB</p>')
-    print(f"<p>GPU: {get_first_gpu_info()}</p>")
-    print(f"<p>CUDA Version: {get_cuda_version()}</p>")
-
-
-if __name__ == "__main__":
-    logs_dir = pathlib.Path("logs")
-
-    dataset_patt = re.compile(".*ds=([\w-]+).*")
-    backend_patt = re.compile(".*backend=(\w+).*")
-    k_patt = re.compile(".*k=(\d+).*")
-
-    # Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
-    benchmarks = get_all_benchmark_info()
-
-    # dump HTML table
-    ordered_datasets = [
-        "netscience",
-        "email_Eu_core",
-        "amazon0302",
-        "cit-patents",
-        "hollywood",
-        "soc-livejournal1",
-    ]
-    # dataset, # Node, # Edge, Directed info
-    dataset_meta = {
-        "netscience": ["1,461", "5,484", "Yes"],
-        "email_Eu_core": ["1,005", "25,571", "Yes"],
-        "amazon0302": ["262,111", "1,234,877", "Yes"],
-        "cit-patents": ["3,774,768", "16,518,948", "Yes"],
-        "hollywood": ["1,139,905", "57,515,616", "No"],
-        "soc-livejournal1": ["4,847,571", "68,993,773", "Yes"],
-    }
-
-    print(
-        """
-    <html>
-    <head>
-    <style>
-        table {
-            table-layout: fixed;
-            width: 100%;
-            border-collapse: collapse;
-        }
-        tbody tr:nth-child(odd) {
-            background-color: #ffffff;
-        }
-        tbody tr:nth-child(even) {
-            background-color: #d3d3d3;
-        }
-        tbody td {
-            text-align: center;
-            color: black;
-        }
-        th,
-        td {
-            padding: 12px;
-        }
-        .footer-main {
-            background-color: #d1d1d1;
-            padding: 20px;
-            padding-top: 0px;
-            font-size: 12px;
-            color: black;
-            width: 100%;
-            display: flex;
-        }
-        .box1{
-            flex: 1;
-            padding-right: 30px;
-        }
-        .box2{
-            flex: 4;
-        }
-        .indent {
-            text-indent: 20px;
-        }
-    </style>
-    </head>
-    <table>
-    <thead>
-    <tr>
-        <th>Dataset<br>Nodes<br>Edges<Br>Directed</th>"""
-    )
-    for ds in ordered_datasets:
-        print(
-            f"      <th>{ds}<br>{dataset_meta[ds][0]}<br>{dataset_meta[ds][1]}<br>{dataset_meta[ds][2]}<br></th>"
-        )
-    print(
-        """   </tr>
-    </thead>
-    <tbody>
-    """
-    )
-    for algo_name in sorted(benchmarks):
-        algo_runs = benchmarks[algo_name]
-        print("   <tr>")
-        print(f"      <td>{algo_name}</td>")
-        # Proceed only if any results are present for both cugraph and NX
-        if "cugraph" in algo_runs and "networkx" in algo_runs:
-            cugraph_algo_runs = algo_runs["cugraph"]
-            networkx_algo_runs = algo_runs["networkx"]
-            datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)
-
-            # populate the table with speedup results for each dataset in the order
-            # specified in ordered_datasets. If results for a run using a dataset
-            # are not present for both cugraph and NX, output an empty cell.
-            for dataset in ordered_datasets:
-                if dataset in datasets_in_both:
-                    cugraph_runtime = cugraph_algo_runs[dataset]
-                    networkx_runtime = networkx_algo_runs[dataset]
-                    (speedup, runtime_delta) = compute_perf_vals(
-                        cugraph_runtime=cugraph_runtime,
-                        networkx_runtime=networkx_runtime,
-                    )
-                    nx_formatted = get_formatted_time_value(networkx_runtime)
-                    cg_formatted = get_formatted_time_value(cugraph_runtime)
-                    print(
-                        f"      <td>{nx_formatted} / {cg_formatted}<br>{speedup}<br>{runtime_delta}</td>"
-                    )
-                else:
-                    print(f"      <td></td>")
-
-        # If a comparison between cugraph and NX cannot be made, output empty cells
-        # for each dataset
-        else:
-            for _ in range(len(ordered_datasets)):
-                print("      <td></td>")
-        print("   </tr>")
-    print(
-        """
-    </tbody>\n</table>
-    <div class="footer-main">
-        <div class="box1">
-            <h4>Table Format:</h4>
-            <ul>
-                <li><strong>NetworkX time / nx-cugraph time</strong></li>
-                <li><strong>Speed-up of using nx-cugraph</strong></li>
-                <li><strong>Time-delta</strong></li>
-            </ul>
-        </div>"""
-    )
-    get_system_info()
-    print("""</div>\n</div>\n</html>""")
diff --git a/benchmarks/nx-cugraph/pytest-based/run-2402.sh b/benchmarks/nx-cugraph/pytest-based/run-2402.sh
deleted file mode 100755
index 44ed0bda43a..00000000000
--- a/benchmarks/nx-cugraph/pytest-based/run-2402.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2024, NVIDIA CORPORATION.
-#
-# Runs benchmarks for the 24.02 algos.
-# Pass either a or b or both. This is useful for separating batches of runs on different GPUs:
-# CUDA_VISIBLE_DEVICES=1 run-2402.sh b
-
-mkdir -p logs
-
-# benches="$benches ..." pattern is easy to comment out individual runs
-benches=
-
-while [[ $1 != "" ]]; do
-    if [[ $1 == "a" ]]; then
-        benches="$benches bench_ancestors"
-        benches="$benches bench_average_clustering"
-        benches="$benches bench_generic_bfs_edges"
-        benches="$benches bench_bfs_edges"
-        benches="$benches bench_bfs_layers"
-        benches="$benches bench_bfs_predecessors"
-        benches="$benches bench_bfs_successors"
-        benches="$benches bench_bfs_tree"
-        benches="$benches bench_clustering"
-        benches="$benches bench_core_number"
-        benches="$benches bench_descendants"
-    elif [[ $1 == "b" ]]; then
-        benches="$benches bench_descendants_at_distance"
-        benches="$benches bench_is_bipartite"
-        benches="$benches bench_is_strongly_connected"
-        benches="$benches bench_is_weakly_connected"
-        benches="$benches bench_number_strongly_connected_components"
-        benches="$benches bench_number_weakly_connected_components"
-        benches="$benches bench_overall_reciprocity"
-        benches="$benches bench_reciprocity"
-        benches="$benches bench_strongly_connected_components"
-        benches="$benches bench_transitivity"
-        benches="$benches bench_triangles"
-        benches="$benches bench_weakly_connected_components"
-    fi
-    shift
-done
-
-for bench in $benches; do
-    pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log"
-done
diff --git a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
deleted file mode 100755
index 73c85000b0f..00000000000
--- a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# location to store datasets used for benchmarking
-export RAPIDS_DATASET_ROOT_DIR=${RAPIDS_DATASET_ROOT_DIR:-/datasets/cugraph}
-mkdir -p logs
-
-# list of algos, datasets, and back-ends to use in combinations
-algos="
-    pagerank
-    betweenness_centrality
-    louvain
-    shortest_path
-    weakly_connected_components
-    triangles
-    bfs_predecessors
-"
-datasets="
-   netscience
-   email_Eu_core
-   amazon0302
-   cit-patents
-   hollywood
-   soc-livejournal
-"
-# None backend is default networkx
-# cugraph-preconvert backend is nx-cugraph
-backends="
-    None
-    cugraph-preconverted
-"
-
-# edit this directly to for pytest
-# e.g. -k "and not 100 and not 1000"
-bc_k_values=""
-
-# check for --cpu-only or --gpu-only args
-if [[ "$#" -eq 1 ]]; then
-    case $1 in
-        --cpu-only)
-            backends="None"
-            ;;
-        --gpu-only)
-            backends="cugraph-preconverted"
-            ;;
-        *)
-            echo "Unknown option: $1"
-            exit 1
-            ;;
-    esac
-fi
-
-for algo in $algos; do
-    for dataset in $datasets; do
-        for backend in $backends; do
-            name="${backend}__${algo}__${dataset}"
-            echo "Running: $backend, $dataset, bench_$algo"
-
-            # uncomment to get command for reproducing test
-            # echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo $bc_k_values\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
-
-            pytest -sv \
-                -k "$backend and $dataset and bench_$algo $bc_k_values" \
-                --benchmark-json="logs/${name}.json" \
-                bench_algos.py 2>&1 | tee "logs/${name}.out"
-        done
-    done
-done
diff --git a/build.sh b/build.sh
index 29abd48166a..1ab98fe4378 100755
--- a/build.sh
+++ b/build.sh
@@ -32,7 +32,6 @@ VALIDARGS="
    cugraph-pyg
    cugraph-dgl
    cugraph-equivariant
-   nx-cugraph
    cpp-mgtests
    cpp-mtmgtests
    docs
@@ -61,7 +60,6 @@ HELP="$0 [<target> ...] [<flag> ...]
    cugraph-pyg                - build the cugraph-pyg Python package
    cugraph-dgl                - build the cugraph-dgl extensions for DGL
    cugraph-equivariant        - build the cugraph-equivariant Python package
-   nx-cugraph                 - build the nx-cugraph Python package
    cpp-mgtests                - build libcugraph and libcugraph_etl MG tests. Builds MPI communicator, adding MPI as a dependency.
    cpp-mtmgtests              - build libcugraph MTMG tests. Adds UCX as a dependency (temporary).
    docs                       - build the docs
@@ -212,7 +210,7 @@ if hasArg uninstall; then
     # removes the latest one and leaves the others installed. build.sh uninstall
     # can be run multiple times to remove all of them, but that is not obvious.
     pip uninstall -y pylibcugraph cugraph cugraph-service-client cugraph-service-server \
-        cugraph-dgl cugraph-pyg cugraph-equivariant nx-cugraph
+        cugraph-dgl cugraph-pyg cugraph-equivariant
 fi
 
 if hasArg clean; then
@@ -357,15 +355,6 @@ if hasArg cugraph-equivariant || hasArg all; then
     fi
 fi
 
-# Build and install the nx-cugraph Python package
-if hasArg nx-cugraph || hasArg all; then
-    if hasArg --clean; then
-        cleanPythonDir ${REPODIR}/python/nx-cugraph
-    else
-        python ${PYTHON_ARGS_FOR_INSTALL} ${REPODIR}/python/nx-cugraph
-    fi
-fi
-
 # Build the docs
 if hasArg docs || hasArg all; then
     if [ ! -d ${LIBCUGRAPH_BUILD_DIR} ]; then
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index ec3f61d383f..e4269707168 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -34,7 +34,6 @@ dependencies:
 - nbsphinx
 - nccl>=2.19
 - networkx>=2.5.1
-- networkx>=3.0
 - ninja
 - notebook>=0.5.0
 - numba>=0.57
@@ -53,7 +52,6 @@ dependencies:
 - pytest
 - pytest-benchmark
 - pytest-cov
-- pytest-mpl
 - pytest-xdist
 - python-louvain
 - pytorch>=2.3,<2.4.0a0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index ff42bbbc365..eb2625b9d50 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -40,7 +40,6 @@ dependencies:
 - nbsphinx
 - nccl>=2.19
 - networkx>=2.5.1
-- networkx>=3.0
 - ninja
 - notebook>=0.5.0
 - numba>=0.57
@@ -58,7 +57,6 @@ dependencies:
 - pytest
 - pytest-benchmark
 - pytest-cov
-- pytest-mpl
 - pytest-xdist
 - python-louvain
 - pytorch>=2.3,<2.4.0a0
diff --git a/conda/recipes/nx-cugraph/build.sh b/conda/recipes/nx-cugraph/build.sh
deleted file mode 100644
index 26665c1e76a..00000000000
--- a/conda/recipes/nx-cugraph/build.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-# This assumes the script is executed from the root of the repo directory
-
-./build.sh nx-cugraph
diff --git a/conda/recipes/nx-cugraph/meta.yaml b/conda/recipes/nx-cugraph/meta.yaml
deleted file mode 100644
index 263f53d9a8f..00000000000
--- a/conda/recipes/nx-cugraph/meta.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
-{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set py_version = environ['CONDA_PY'] %}
-{% set date_string = environ['RAPIDS_DATE_STRING'] %}
-
-package:
-  name: nx-cugraph
-  version: {{ version }}
-
-source:
-  path: ../../..
-
-build:
-  number: {{ GIT_DESCRIBE_NUMBER }}
-  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-
-requirements:
-  host:
-    - python
-    - rapids-build-backend>=0.3.1,<0.4.0.dev0
-    - setuptools>=61.0.0
-  run:
-    - pylibcugraph ={{ version }}
-    - networkx >=3.0
-    - cupy >=12.0.0
-    - python
-
-tests:
-  imports:
-    - nx_cugraph
-  commands:
-    - pip check
-  requires:
-    - pip
-
-about:
-  home: https://rapids.ai/
-  dev_url: https://github.com/rapidsai/cugraph
-  license: Apache-2.0
-  license_file: ../../../LICENSE
-  summary: cuGraph backend for GPU-accelerated NetworkX
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2cea2e504ab..27e1999cb75 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -167,6 +167,7 @@ set(CUGRAPH_SOURCES
     src/detail/groupby_and_count_mg_v64_e64.cu
     src/detail/collect_comm_wrapper_mg_v32_e32.cu
     src/detail/collect_comm_wrapper_mg_v64_e64.cu
+    src/sampling/detail/conversion_utilities.cu
     src/sampling/random_walks_mg_v64_e64.cu
     src/sampling/random_walks_mg_v32_e32.cu
     src/community/detail/common_methods_mg_v64_e64.cu
@@ -264,10 +265,10 @@ set(CUGRAPH_SOURCES
     src/sampling/detail/sample_edges_mg_v32_e32.cu
     src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu
     src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu
-    src/sampling/neighbor_sampling_mg_v32_e32.cpp
-    src/sampling/neighbor_sampling_mg_v64_e64.cpp
-    src/sampling/neighbor_sampling_sg_v32_e32.cpp
-    src/sampling/neighbor_sampling_sg_v64_e64.cpp
+    src/sampling/neighbor_sampling_mg_v32_e32.cu
+    src/sampling/neighbor_sampling_mg_v64_e64.cu
+    src/sampling/neighbor_sampling_sg_v32_e32.cu
+    src/sampling/neighbor_sampling_sg_v64_e64.cu
     src/sampling/negative_sampling_sg_v32_e32.cu
     src/sampling/negative_sampling_sg_v64_e64.cu
     src/sampling/negative_sampling_mg_v32_e32.cu
diff --git a/cpp/include/cugraph/detail/utility_wrappers.hpp b/cpp/include/cugraph/detail/utility_wrappers.hpp
index 3d99b85556b..b1afeafd66b 100644
--- a/cpp/include/cugraph/detail/utility_wrappers.hpp
+++ b/cpp/include/cugraph/detail/utility_wrappers.hpp
@@ -65,6 +65,48 @@ void uniform_random_fill(rmm::cuda_stream_view const& stream_view,
 template <typename value_t>
 void scalar_fill(raft::handle_t const& handle, value_t* d_value, size_t size, value_t value);
 
+/**
+ * @brief    Sort a device span
+ *
+ * @tparam      value_t      type of the value to operate on. Must be either int32_t or int64_t.
+ *
+ * @param [in]  handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
+ * and handles to various CUDA libraries) to run graph algorithms.
+ * @param[out]  values      device span to sort
+ *
+ */
+template <typename value_t>
+void sort_ints(raft::handle_t const& handle, raft::device_span<value_t> values);
+
+/**
+ * @brief    Keep unique element from a device span
+ *
+ * @tparam      value_t      type of the value to operate on. Must be either int32_t or int64_t.
+ *
+ * @param [in]  handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
+ * and handles to various CUDA libraries) to run graph algorithms.
+ * @param[in]  values      device span of unique elements.
+ * @return the number of unique elements.
+ *
+ */
+template <typename value_t>
+size_t unique_ints(raft::handle_t const& handle, raft::device_span<value_t> values);
+
+/**
+ * @brief    Increment the values of a device span by a constant value
+ *
+ * @tparam      value_t      type of the value to operate on. Must be either int32_t or int64_t.
+ *
+ * @param[out]  values       device span to update
+ * @param[in]   value        value to be added to each element of the buffer
+ * @param[in]   stream_view  stream view
+ *
+ */
+template <typename value_t>
+void transform_increment_ints(raft::device_span<value_t> values,
+                              value_t value,
+                              rmm::cuda_stream_view const& stream_view);
+
 /**
  * @brief    Fill a buffer with a sequence of values
  *
@@ -73,7 +115,7 @@ void scalar_fill(raft::handle_t const& handle, value_t* d_value, size_t size, va
  *
  * Similar to the function std::iota, wraps the function thrust::sequence
  *
- * @tparam      value_t      type of the value to operate on
+ * @tparam      value_t      type of the value to operate on.
  *
  * @param[in]   stream_view  stream view
  * @param[out]  d_value      device array to fill
diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp
index 783cd3a7e2b..3d41e954416 100644
--- a/cpp/include/cugraph/sampling_functions.hpp
+++ b/cpp/include/cugraph/sampling_functions.hpp
@@ -43,6 +43,8 @@ enum class prior_sources_behavior_t { DEFAULT = 0, CARRY_OVER, EXCLUDE };
 /**
  * @brief Uniform Neighborhood Sampling.
  *
+ * @deprecated Replaced with homogeneous_uniform_neighbor_sample
+ *
  * This function traverses from a set of starting vertices, traversing outgoing edges and
  * randomly selects from these outgoing neighbors to extract a subgraph.
  *
@@ -53,19 +55,20 @@ enum class prior_sources_behavior_t { DEFAULT = 0, CARRY_OVER, EXCLUDE };
  * encountered in.  The label output (optional) identifes the vertex label.  The offsets array
  * (optional) will be described below and is dependent upon the input parameters.
  *
- * If @p starting_vertex_labels is not specified then no organization is applied to the output, the
- * label and offsets values in the return set will be std::nullopt.
+ * If @p starting_vertex_label_offsets is not specified then no organization is applied to the
+ * output, the label and offsets values in the return set will be std::nullopt.
  *
- * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then
- * the label output has values.  This will also result in the output being sorted by vertex label.
- * The offsets array in the return will be a CSR-style offsets array to identify the beginning of
- * each label range in the data.  `labels.size() == (offsets.size() - 1)`.
+ * If @p starting_vertex_label_offsets is specified and @p label_to_output_comm_rank is not
+ * specified then the label output has values.  This will also result in the output being sorted by
+ * vertex label. The offsets array in the return will be a CSR-style offsets array to identify the
+ * beginning of each label range in the data.  `labels.size() == (offsets.size() - 1)`.
  *
- * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the
- * label output has values.  This will also result in the output being sorted by vertex label.  The
- * offsets array in the return will be a CSR-style offsets array to identify the beginning of each
- * label range in the data.  `labels.size() == (offsets.size() - 1)`.  Additionally, the data will
- * be shuffled so that all data with a particular label will be on the specified rank.
+ * If @p starting_vertex_label_offsets is specified and @p label_to_output_comm_rank is specified
+ * then the label output has values.  This will also result in the output being sorted by vertex
+ * label.  The offsets array in the return will be a CSR-style offsets array to identify the
+ * beginning of each label range in the data.  `labels.size() == (offsets.size() - 1)`.
+ * Additionally, the data will be shuffled so that all data with a particular label will be on the
+ * specified rank.
  *
  * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
  * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
@@ -83,8 +86,8 @@ enum class prior_sources_behavior_t { DEFAULT = 0, CARRY_OVER, EXCLUDE };
  * @param edge_type_view Optional view object holding edge types for @p graph_view.
  * @param starting_vertices Device span of starting vertex IDs for the sampling.
  * In a multi-gpu context the starting vertices should be local to this GPU.
- * @param starting_vertex_labels Optional device span of labels associted with each starting vertex
- * for the sampling.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
  * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular
  * output rank.  Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the
  * output rank.  The label span must be sorted in ascending order.
@@ -126,7 +129,7 @@ uniform_neighbor_sample(
   std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
   std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
   raft::device_span<vertex_t const> starting_vertices,
-  std::optional<raft::device_span<label_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<label_t const>> starting_vertex_label_offsets,
   std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
     label_to_output_comm_rank,
   raft::host_span<int32_t const> fan_out,
@@ -140,6 +143,8 @@ uniform_neighbor_sample(
 /**
  * @brief Biased Neighborhood Sampling.
  *
+ * @deprecated Replaced with homogeneous_biased_neighbor_sample
+ *
  * This function traverses from a set of starting vertices, traversing outgoing edges and
  * randomly selects (with edge biases) from these outgoing neighbors to extract a subgraph.
  *
@@ -150,24 +155,26 @@ uniform_neighbor_sample(
  * encountered in.  The label output (optional) identifes the vertex label.  The offsets array
  * (optional) will be described below and is dependent upon the input parameters.
  *
- * If @p starting_vertex_labels is not specified then no organization is applied to the output, the
- * label and offsets values in the return set will be std::nullopt.
+ * If @p starting_vertex_label_offsets is not specified then no organization is applied to the
+ * output, the label and offsets values in the return set will be std::nullopt.
  *
- * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then
- * the label output has values.  This will also result in the output being sorted by vertex label.
- * The offsets array in the return will be a CSR-style offsets array to identify the beginning of
- * each label range in the data.  `labels.size() == (offsets.size() - 1)`.
+ * If @p starting_vertex_label_offsets is specified and @p label_to_output_comm_rank is not
+ * specified then the label output has values.  This will also result in the output being sorted by
+ * vertex label. The offsets array in the return will be a CSR-style offsets array to identify the
+ * beginning of each label range in the data.  `labels.size() == (offsets.size() - 1)`.
  *
- * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the
- * label output has values.  This will also result in the output being sorted by vertex label.  The
- * offsets array in the return will be a CSR-style offsets array to identify the beginning of each
- * label range in the data.  `labels.size() == (offsets.size() - 1)`.  Additionally, the data will
- * be shuffled so that all data with a particular label will be on the specified rank.
+ * If @p starting_vertex_label_offsets is specified and @p label_to_output_comm_rank is specified
+ * then the label output has values.  This will also result in the output being sorted by vertex
+ * label.  The offsets array in the return will be a CSR-style offsets array to identify the
+ * beginning of each label range in the data.  `labels.size() == (offsets.size() - 1)`.
+ * Additionally, the data will be shuffled so that all data with a particular label will be on the
+ * specified rank.
  *
  * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
  * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
  * @tparam weight_t Type of edge weights. Needs to be a floating point type.
  * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam bias_t Type of bias. Needs to be an integral type.
  * @tparam label_t Type of label. Needs to be an integral type.
  * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
  * true) are major indices
@@ -184,8 +191,8 @@ uniform_neighbor_sample(
  * corresponding edge can never be selected.
  * @param starting_vertices Device span of starting vertex IDs for the sampling.
  * In a multi-gpu context the starting vertices should be local to this GPU.
- * @param starting_vertex_labels Optional device span of labels associted with each starting vertex
- * for the sampling.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
  * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular
  * output rank.  Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the
  * output rank.  The label span must be sorted in ascending order.
@@ -229,7 +236,7 @@ biased_neighbor_sample(
   std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
   edge_property_view_t<edge_t, bias_t const*> edge_bias_view,
   raft::device_span<vertex_t const> starting_vertices,
-  std::optional<raft::device_span<label_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<label_t const>> starting_vertex_label_offsets,
   std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
     label_to_output_comm_rank,
   raft::host_span<int32_t const> fan_out,
@@ -240,6 +247,349 @@ biased_neighbor_sample(
   bool dedupe_sources                             = false,
   bool do_expensive_check                         = false);
 
+struct sampling_flags_t {
+  /**
+   * Specifies how to handle prior sources. Default is DEFAULT.
+   */
+  prior_sources_behavior_t prior_sources_behavior{};
+
+  /**
+   * Specifies if the hop information should be returned.  Default is false.
+   */
+  bool return_hops{false};
+
+  /**
+   * If true then if a vertex v appears as a destination in hop X multiple times
+   * with the same label, it will only be passed once (for each label) as a source
+   * for the next hop.  Default is false.
+   */
+  bool dedupe_sources{false};
+
+  /**
+   * Specifies if random sampling is done with replacement
+   *   (true) or without replacement (false).  Default is true.
+   */
+  bool with_replacement{true};
+};
+
+/**
+ * @brief Homogeneous Uniform Neighborhood Sampling.
+ *
+ * This function traverses from a set of starting vertices, traversing outgoing edges and
+ * randomly selects (uniformly) from these outgoing neighbors to extract a subgraph.
+ * The branching out to select outgoing neighbors is performed with homogeneous fanouts
+ *
+ * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop,
+ * offsets), identifying the randomly selected edges where the size of src, dst, weight, edge_id,
+ * edge_type and hop is the number of sampled edges while the size of the offsets vector is the
+ * number of labels + 1.  src is the source vertex, dst is the destination vertex, weight
+ * (optional) is the edge weight, edge_id (optional) identifies the edge id, edge_type (optional)
+ * identifies the edge type, hop identifies which hop the edge was encountered in.
+ * The offsets array (optional) identifies the offset for each label.
+ *
+ * If @p label_to_output_comm_rank is specified then the data will be shuffled so that all entries
+ * for a particular label are returned on the specified rank.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
+ * true) are major indices
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param graph_view Graph View object to generate NBR Sampling on.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
+ * @param edge_type_view Optional view object holding edge types for @p graph_view.
+ * @param starting_vertices Device span of starting vertex IDs for the sampling.
+ * In a multi-gpu context the starting vertices should be local to this GPU.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
+ * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling
+ * outputs of each vertex label.  This should be the same on each rank.
+ * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
+ * level.
+ * @param flags A set of flags indicating which sampling features should be used.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex,
+ * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type,
+ * optional int32_t hop, optional label_t label, optional size_t offsets)
+ */
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_label_offsets,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check = false);
+
+/**
+ * @brief Homogeneous Biased Neighborhood Sampling.
+ *
+ * This function traverses from a set of starting vertices, traversing outgoing edges and
+ * randomly selects (with edge biases) from these outgoing neighbors to extract a subgraph.
+ * The branching out to select outgoing neighbors is performed with homogeneous fanouts
+ *
+ * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop,
+ * offsets), identifying the randomly selected edges where the size of src, dst, weight, edge_id,
+ * edge_type and hop is the number of sampled edges while the size of the offsets vector is the
+ * number of labels + 1.  src is the source vertex, dst is the destination vertex, weight
+ * (optional) is the edge weight, edge_id (optional) identifies the edge id, edge_type (optional)
+ * identifies the edge type, hop identifies which hop the edge was encountered in.
+ * The offsets array (optional) identifies the offset for each label.
+ *
+ * If @p label_to_output_comm_rank is specified then the data will be shuffled so that all entries
+ * for a particular label are returned on the specified rank.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam bias_t Type of bias. Needs to be an integral type.
+ * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
+ * true) are major indices
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param graph_view Graph View object to generate NBR Sampling on.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
+ * @param edge_type_view Optional view object holding edge types for @p graph_view.
+ * @param edge_bias_view View object holding edge biases (to be used in biased sampling) for @p
+ * graph_view. Bias values should be non-negative and the sum of edge bias values from any vertex
+ * should not exceed std::numeric_limits<bias_t>::max(). 0 bias value indicates that the
+ * corresponding edge can never be selected.
+ * @param starting_vertices Device span of starting vertex IDs for the sampling.
+ * In a multi-gpu context the starting vertices should be local to this GPU.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
+ * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling
+ * outputs of each vertex label.  This should be the same on each rank.
+ * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
+ * level.
+ * @param flags A set of flags indicating which sampling features should be used.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex,
+ * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type,
+ * optional int32_t hop, optional label_t label, optional size_t offsets)
+ */
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          typename bias_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  edge_property_view_t<edge_t, bias_t const*> edge_bias_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_label_offsets,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check = false);
+
+/**
+ * @brief Heterogeneous Uniform Neighborhood Sampling.
+ *
+ * This function traverses from a set of starting vertices, traversing outgoing edges and
+ * randomly selects (uniformly) from these outgoing neighbors to extract a subgraph.
+ * The branching out to select outgoing neighbors is performed with heterogeneous fanouts
+ * where the number of edge types is bigger than 1.
+ *
+ * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop,
+ * offsets), identifying the randomly selected edges where the size of src, dst, weight, edge_id,
+ * edge_type and hop is the number of sampled edges while the size of the offsets vector is the
+ * number of labels + 1.  src is the source vertex, dst is the destination vertex, weight
+ * (optional) is the edge weight, edge_id (optional) identifies the edge id, edge_type (optional)
+ * identifies the edge type, hop identifies which hop the edge was encountered in.
+ * The offsets array (optional) identifies the offset for each label.
+ *
+ * If @p label_to_output_comm_rank is specified then the data will be shuffled so that all entries
+ * for a particular label are returned on the specified rank.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
+ * true) are major indices
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param graph_view Graph View object to generate NBR Sampling on.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
+ * @param edge_type_view Optional view object holding edge types for @p graph_view.
+ * @param starting_vertices Device span of starting vertex IDs for the sampling.
+ * In a multi-gpu context the starting vertices should be local to this GPU.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
+ * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling
+ * outputs of each vertex label.  This should be the same on each rank.
+ * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
+ * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types +
+ * edge_type_id]'
+ * @param num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor
+ * sample whereas a value greater than 1 translates to heterogeneous neighbor sample.
+ * @param flags A set of flags indicating which sampling features should be used.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex,
+ * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type,
+ * optional int32_t hop, optional label_t label, optional size_t offsets)
+ */
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_label_offsets,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  edge_type_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check = false);
+
+/**
+ * @brief Heterogeneous Biased Neighborhood Sampling.
+ *
+ * This function traverses from a set of starting vertices, traversing outgoing edges and
+ * randomly selects (with edge biases) from these outgoing neighbors to extract a subgraph.
+ * The branching out to select outgoing neighbors is performed with heterogeneous fanouts
+ * where the number of edge types is bigger than 1.
+ *
+ * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop,
+ * offsets), identifying the randomly selected edges where the size of src, dst, weight, edge_id,
+ * edge_type and hop is the number of sampled edges while the size of the offsets vector is the
+ * number of labels + 1.  src is the source vertex, dst is the destination vertex, weight
+ * (optional) is the edge weight, edge_id (optional) identifies the edge id, edge_type (optional)
+ * identifies the edge type, hop identifies which hop the edge was encountered in.
+ * The offsets array (optional) identifies the offset for each label.
+ *
+ * If @p label_to_output_comm_rank is specified then the data will be shuffled so that all entries
+ * for a particular label are returned on the specified rank.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam edge_type_t Type of edge type. Needs to be an integral type.
+ * @tparam bias_t Type of bias. Needs to be an integral type.
+ * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if
+ * true) are major indices
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param rng_state A pre-initialized raft::RngState object for generating random numbers
+ * @param graph_view Graph View object to generate NBR Sampling on.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view.
+ * @param edge_id_view Optional view object holding edge ids for @p graph_view.
+ * @param edge_type_view Optional view object holding edge types for @p graph_view.
+ * @param edge_bias_view View object holding edge biases (to be used in biased sampling) for @p
+ * graph_view. Bias values should be non-negative and the sum of edge bias values from any vertex
+ * should not exceed std::numeric_limits<bias_t>::max(). 0 bias value indicates that the
+ * corresponding edge can never be selected.
+ * @param starting_vertices Device span of starting vertex IDs for the sampling.
+ * In a multi-gpu context the starting vertices should be local to this GPU.
+ * @param starting_vertex_label_offsets Optional device span of labels associated with each starting
+ * vertex for the sampling.
+ * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling
+ * outputs of each vertex label.  This should be the same on each rank.
+ * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
+ * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types +
+ * edge_type_id]'
+ * @param num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor
+ * sample whereas a value greater than 1 translates to heterogeneous neighbor sample.
+ * @param flags A set of flags indicating which sampling features should be used.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex,
+ * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type,
+ * optional int32_t hop, optional label_t label, optional size_t offsets)
+ */
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          typename bias_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  edge_property_view_t<edge_t, bias_t const*> edge_bias_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_label_offsets,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  edge_type_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check = false);
+
 /*
  * @brief renumber sampled edge list and compress to the (D)CSR|(D)CSC format.
  *
diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h
index bb26e577915..ef75e726d80 100644
--- a/cpp/include/cugraph_c/sampling_algorithms.h
+++ b/cpp/include/cugraph_c/sampling_algorithms.h
@@ -199,6 +199,13 @@ typedef struct {
   int32_t align_;
 } cugraph_sampling_options_t;
 
+/**
+ * @brief     Opaque sampling options type
+ */
+typedef struct {
+  int32_t align_;
+} sampling_flags_t;
+
 /**
  * @brief     Enumeration for prior sources behavior
  */
@@ -323,6 +330,8 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options);
 /**
  * @brief     Uniform Neighborhood Sampling
  *
+ * @deprecated  This API will be deleted, use cugraph_homogeneous_uniform_neighbor_sample
+ *
  * Returns a sample of the neighborhood around specified start vertices.  Optionally, each
  * start vertex can be associated with a label, allowing the caller to specify multiple batches
  * of sampling requests in the same function call - which should improve GPU utilization.
@@ -348,8 +357,8 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options);
  * label_to_comm_rank[i].  If not specified then the output data will not be shuffled between ranks.
  * @param [in]  label_offsets Device array of the offsets for each label in the seed list.  This
  *                            parameter is only used with the retain_seeds option.
- * @param [in]  fanout       Host array defining the fan out at each step in the sampling algorithm.
- *                           We only support fanout values of type INT32
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
  * @param [in,out] rng_state State of the random number generator, updated with each call
  * @param [in]  sampling_options
  *                           Opaque pointer defining the sampling options.
@@ -378,6 +387,8 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
 /**
  * @brief     Biased Neighborhood Sampling
  *
+ * @deprecated  This API will be deleted, use cugraph_homogeneous_biased_neighbor_sample.
+ *
  * Returns a sample of the neighborhood around specified start vertices.  Optionally, each
  * start vertex can be associated with a label, allowing the caller to specify multiple batches
  * of sampling requests in the same function call - which should improve GPU utilization.
@@ -406,8 +417,8 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
  * label_to_comm_rank[i].  If not specified then the output data will not be shuffled between ranks.
  * @param [in]  label_offsets Device array of the offsets for each label in the seed list.  This
  *                            parameter is only used with the retain_seeds option.
- * @param [in]  fanout       Host array defining the fan out at each step in the sampling algorithm.
- *                           We only support fanout values of type INT32
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
  * @param [in,out] rng_state State of the random number generator, updated with each call
  * @param [in]  sampling_options
  *                           Opaque pointer defining the sampling options.
@@ -434,6 +445,186 @@ cugraph_error_code_t cugraph_biased_neighbor_sample(
   cugraph_sample_result_t** result,
   cugraph_error_t** error);
 
+/**
+ * @brief     Homogeneous Uniform Neighborhood Sampling
+ *
+ * Returns a sample of the neighborhood around specified start vertices and fan_out.
+ * The neighborhood is sampled uniformly.
+ * Optionally, each start vertex can be associated with a label, allowing the caller to specify
+ * multiple batches of sampling requests in the same function call - which should improve GPU
+ * utilization.
+ *
+ * If label is NULL then all start vertices will be considered part of the same batch and the
+ * return value will not have a label column.
+ *
+ * @param [in]  handle       Handle for accessing resources
+ *  * @param [in,out] rng_state State of the random number generator, updated with each call
+ * @param [in]  graph        Pointer to graph.  NOTE: Graph might be modified if the storage
+ *                           needs to be transposed
+ * @param [in]  start_vertices Device array of start vertices for the sampling
+ * @param [in]  starting_vertex_label_offsets Device array of the offsets for each label in
+ * the seed list. This parameter is only used with the retain_seeds option.
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
+ * @param [in]  sampling_options
+ *                           Opaque pointer defining the sampling options.
+ * @param [in]  do_expensive_check
+ *                           A flag to run expensive checks for input arguments (if set to true)
+ * @param [out]  result      Output from the uniform_neighbor_sample call
+ * @param [out] error        Pointer to an error object storing details of any error.  Will
+ *                           be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error);
+
+/**
+ * @brief     Homogeneous Biased Neighborhood Sampling
+ *
+ * Returns a sample of the neighborhood around specified start vertices and fan_out.
+ * The neighborhood is sampled uniformly.
+ * Optionally, each start vertex can be associated with a label, allowing the caller to specify
+ * multiple batches of sampling requests in the same function call - which should improve GPU
+ * utilization.
+ *
+ * If label is NULL then all start vertices will be considered part of the same batch and the
+ * return value will not have a label column.
+ *
+ * @param [in]  handle       Handle for accessing resources
+ *  * @param [in,out] rng_state State of the random number generator, updated with each call
+ * @param [in]  graph        Pointer to graph.  NOTE: Graph might be modified if the storage
+ *                           needs to be transposed
+ * @param [in]  edge_biases  Device array of edge biases to use for sampling.  If NULL
+ * use the edge weight as the bias. If set to NULL, edges will be sampled uniformly.
+ * @param [in]  start_vertices Device array of start vertices for the sampling
+ * @param [in]  starting_vertex_label_offsets Device array of the offsets for each label in
+ * the seed list. This parameter is only used with the retain_seeds option.
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
+ * @param [in]  sampling_options
+ *                           Opaque pointer defining the sampling options.
+ * @param [in]  do_expensive_check
+ *                           A flag to run expensive checks for input arguments (if set to true)
+ * @param [out]  result      Output from the uniform_neighbor_sample call
+ * @param [out] error        Pointer to an error object storing details of any error.  Will
+ *                           be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_edge_property_view_t* edge_biases,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error);
+
+/**
+ * @brief     Heterogeneous Uniform Neighborhood Sampling
+ *
+ * Returns a sample of the neighborhood around specified start vertices and fan_out.
+ * The neighborhood is sampled uniformly.
+ * Optionally, each start vertex can be associated with a label, allowing the caller to specify
+ * multiple batches of sampling requests in the same function call - which should improve GPU
+ * utilization.
+ *
+ * If label is NULL then all start vertices will be considered part of the same batch and the
+ * return value will not have a label column.
+ *
+ * @param [in]  handle       Handle for accessing resources
+ *  * @param [in,out] rng_state State of the random number generator, updated with each call
+ * @param [in]  graph        Pointer to graph.  NOTE: Graph might be modified if the storage
+ *                           needs to be transposed
+ * @param [in]  start_vertices Device array of start vertices for the sampling
+ * @param [in]  starting_vertex_label_offsets Device array of the offsets for each label in
+ * the seed list. This parameter is only used with the retain_seeds option.
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
+ * @param [in]  num_edge_types Number of edge types where a value of 1 translates to homogeneous
+ * neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample.
+ * @param [in]  sampling_options
+ *                           Opaque pointer defining the sampling options.
+ * @param [in]  do_expensive_check
+ *                           A flag to run expensive checks for input arguments (if set to true)
+ * @param [out]  result      Output from the uniform_neighbor_sample call
+ * @param [out] error        Pointer to an error object storing details of any error.  Will
+ *                           be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  int num_edge_types,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error);
+
+/**
+ * @brief     Heterogeneous Biased Neighborhood Sampling
+ *
+ * Returns a sample of the neighborhood around specified start vertices and fan_out.
+ * The neighborhood is sampled uniformly.
+ * Optionally, each start vertex can be associated with a label, allowing the caller to specify
+ * multiple batches of sampling requests in the same function call - which should improve GPU
+ * utilization.
+ *
+ * If label is NULL then all start vertices will be considered part of the same batch and the
+ * return value will not have a label column.
+ *
+ * @param [in]  handle       Handle for accessing resources
+ *  * @param [in,out] rng_state State of the random number generator, updated with each call
+ * @param [in]  graph        Pointer to graph.  NOTE: Graph might be modified if the storage
+ *                           needs to be transposed
+ * @param [in]  edge_biases  Device array of edge biases to use for sampling.  If NULL
+ * use the edge weight as the bias. If set to NULL, edges will be sampled uniformly.
+ * @param [in]  start_vertices Device array of start vertices for the sampling
+ * @param [in]  starting_vertex_label_offsets Device array of the offsets for each label in
+ * the seed list. This parameter is only used with the retain_seeds option.
+ * @param [in]  fan_out       Host array defining the fan out at each step in the sampling
+ * algorithm. We only support fan_out values of type INT32
+ * @param [in]  num_edge_types Number of edge types where a value of 1 translates to homogeneous
+ * neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample.
+ * @param [in]  sampling_options
+ *                           Opaque pointer defining the sampling options.
+ * @param [in]  do_expensive_check
+ *                           A flag to run expensive checks for input arguments (if set to true)
+ * @param [out]  result      Output from the uniform_neighbor_sample call
+ * @param [out] error        Pointer to an error object storing details of any error.  Will
+ *                           be populated if error code is not CUGRAPH_SUCCESS
+ * @return error code
+ */
+cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_edge_property_view_t* edge_biases,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  int num_edge_types,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error);
+
 /**
  * @deprecated This call should be replaced with cugraph_sample_result_get_majors
  * @brief     Get the source vertices from the sampling algorithm result
@@ -584,6 +775,26 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map(
 cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_offsets(
   const cugraph_sample_result_t* result);
 
+/**
+ * @ingroup samplingC
+ * @brief     Get the edge renumber map
+ *
+ * @param [in]   result   The result from a sampling algorithm
+ * @return type erased array pointing to the renumber map
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_renumber_map(
+  const cugraph_sample_result_t* result);
+
+/**
+ * @ingroup samplingC
+ * @brief     Get the edge renumber map offets
+ *
+ * @param [in]   result   The result from a sampling algorithm
+ * @return type erased array pointing to the renumber map
+ */
+cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_renumber_map_offsets(
+  const cugraph_sample_result_t* result);
+
 /**
  * @ingroup samplingC
  * @brief     Free a sampling result
diff --git a/cpp/src/c_api/array.hpp b/cpp/src/c_api/array.hpp
index 048d2ee1cea..0ab30a1cb72 100644
--- a/cpp/src/c_api/array.hpp
+++ b/cpp/src/c_api/array.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -125,6 +125,27 @@ struct cugraph_type_erased_host_array_t {
     std::copy(vec.begin(), vec.end(), reinterpret_cast<T*>(data_.get()));
   }
 
+  cugraph_type_erased_host_array_t(cugraph_type_erased_host_array_view_t const* view_p)
+    : data_(std::make_unique<std::byte[]>(view_p->num_bytes_)),
+      size_(view_p->size_),
+      num_bytes_(view_p->num_bytes_),
+      type_(view_p->type_)
+  {
+    std::copy(view_p->data_, view_p->data_ + num_bytes_, data_.get());
+  }
+
+  template <typename T>
+  T* as_type()
+  {
+    return reinterpret_cast<T*>(data_.get());
+  }
+
+  template <typename T>
+  T const* as_type() const
+  {
+    return reinterpret_cast<T const*>(data_.get());
+  }
+
   auto view()
   {
     return new cugraph_type_erased_host_array_view_t{data_.get(), size_, num_bytes_, type_};
diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp
index df741a349d2..8778369dbe6 100644
--- a/cpp/src/c_api/graph_functions.cpp
+++ b/cpp/src/c_api/graph_functions.cpp
@@ -84,7 +84,7 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor {
                               std::nullopt,
                               std::nullopt);
       }
-
+      // FIXME: use std::tuple (template) instead.
       result_ = new cugraph::c_api::cugraph_vertex_pairs_t{
         new cugraph::c_api::cugraph_type_erased_device_array_t(first_copy, graph_->vertex_type_),
         new cugraph::c_api::cugraph_type_erased_device_array_t(second_copy, graph_->vertex_type_)};
diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp
index 69306806030..be3a44d813a 100644
--- a/cpp/src/c_api/neighbor_sampling.cpp
+++ b/cpp/src/c_api/neighbor_sampling.cpp
@@ -16,12 +16,15 @@
 
 #include "c_api/abstract_functor.hpp"
 #include "c_api/graph.hpp"
+#include "c_api/graph_helper.hpp"
 #include "c_api/properties.hpp"
 #include "c_api/random.hpp"
 #include "c_api/resource_handle.hpp"
 #include "c_api/utils.hpp"
+#include "sampling/detail/sampling_utils.hpp"
 
 #include <cugraph_c/algorithms.h>
+#include <cugraph_c/sampling_algorithms.h>
 
 #include <cugraph/algorithms.hpp>
 #include <cugraph/detail/shuffle_wrappers.hpp>
@@ -44,6 +47,13 @@ struct cugraph_sampling_options_t {
   bool_t retain_seeds_{FALSE};
 };
 
+struct sampling_flags_t {
+  prior_sources_behavior_t prior_sources_behavior_{prior_sources_behavior_t::DEFAULT};
+  bool_t return_hops_{FALSE};
+  bool_t dedupe_sources_{FALSE};
+  bool_t with_replacement_{FALSE};
+};
+
 struct cugraph_sample_result_t {
   cugraph_type_erased_device_array_t* major_offsets_{nullptr};
   cugraph_type_erased_device_array_t* majors_{nullptr};
@@ -56,6 +66,8 @@ struct cugraph_sample_result_t {
   cugraph_type_erased_device_array_t* label_{nullptr};
   cugraph_type_erased_device_array_t* renumber_map_{nullptr};
   cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr};
+  cugraph_type_erased_device_array_t* edge_renumber_map_{nullptr};
+  cugraph_type_erased_device_array_t* edge_renumber_map_offsets_{nullptr};
 };
 
 }  // namespace c_api
@@ -63,6 +75,7 @@ struct cugraph_sample_result_t {
 
 namespace {
 
+// Deprecated functor
 struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
@@ -398,11 +411,14 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct
                        : nullptr,
         (renumber_map_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
                                    renumber_map_offsets.value(), SIZE_T)
-                               : nullptr};
+                               : nullptr,
+        nullptr,
+        nullptr};
     }
   }
 };
 
+// Deprecated functor
 struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
@@ -748,7 +764,598 @@ struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functo
                        : nullptr,
         (renumber_map_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
                                    renumber_map_offsets.value(), SIZE_T)
-                               : nullptr};
+                               : nullptr,
+        nullptr,
+        nullptr};
+    }
+  }
+};
+
+struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor {
+  raft::handle_t const& handle_;
+  cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
+  cugraph::c_api::cugraph_graph_t* graph_{nullptr};
+  cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr};
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
+  cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_offsets_{nullptr};
+  cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr};
+  int num_edge_types_{};
+  cugraph::c_api::cugraph_sampling_options_t options_{};
+  bool is_biased_{false};
+  bool do_expensive_check_{false};
+  cugraph::c_api::cugraph_sample_result_t* result_{nullptr};
+
+  neighbor_sampling_functor(cugraph_resource_handle_t const* handle,
+                            cugraph_rng_state_t* rng_state,
+                            cugraph_graph_t* graph,
+                            cugraph_edge_property_view_t const* edge_biases,
+                            cugraph_type_erased_device_array_view_t const* start_vertices,
+                            cugraph_type_erased_device_array_view_t const* start_vertex_offsets,
+                            cugraph_type_erased_host_array_view_t const* fan_out,
+                            int num_edge_types,
+                            cugraph::c_api::cugraph_sampling_options_t options,
+                            bool is_biased,
+                            bool do_expensive_check)
+    : abstract_functor(),
+      handle_(*reinterpret_cast<cugraph::c_api::cugraph_resource_handle_t const*>(handle)->handle_),
+      rng_state_(reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(rng_state)),
+      graph_(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)),
+      edge_biases_(
+        reinterpret_cast<cugraph::c_api::cugraph_edge_property_view_t const*>(edge_biases)),
+      start_vertices_(
+        reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+          start_vertices)),
+      start_vertex_offsets_(
+        reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+          start_vertex_offsets)),
+      fan_out_(
+        reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)),
+      num_edge_types_(num_edge_types),
+      options_(options),
+      is_biased_(is_biased),
+      do_expensive_check_(do_expensive_check)
+  {
+  }
+
+  template <typename vertex_t,
+            typename edge_t,
+            typename weight_t,
+            typename edge_type_t,
+            bool store_transposed,
+            bool multi_gpu>
+  void operator()()
+  {
+    using label_t = int32_t;
+
+    // FIXME: Think about how to handle SG vice MG
+    if constexpr (!cugraph::is_candidate<vertex_t, edge_t, weight_t>::value) {
+      unsupported();
+    } else {
+      // uniform_nbr_sample expects store_transposed == false
+      if constexpr (store_transposed) {
+        error_code_ = cugraph::c_api::
+          transpose_storage<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+            handle_, graph_, error_.get());
+        if (error_code_ != CUGRAPH_SUCCESS) return;
+      }
+
+      auto graph =
+        reinterpret_cast<cugraph::graph_t<vertex_t, edge_t, false, multi_gpu>*>(graph_->graph_);
+
+      auto graph_view = graph->view();
+
+      auto edge_weights = reinterpret_cast<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, true, multi_gpu>,
+                                 weight_t>*>(graph_->edge_weights_);
+
+      auto edge_ids = reinterpret_cast<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, true, multi_gpu>,
+                                 edge_t>*>(graph_->edge_ids_);
+
+      auto edge_types = reinterpret_cast<
+        cugraph::edge_property_t<cugraph::graph_view_t<vertex_t, edge_t, true, multi_gpu>,
+                                 edge_type_t>*>(graph_->edge_types_);
+
+      auto number_map = reinterpret_cast<rmm::device_uvector<vertex_t>*>(graph_->number_map_);
+
+      auto edge_biases =
+        edge_biases_ ? reinterpret_cast<cugraph::edge_property_view_t<edge_t, weight_t const*>*>(
+                         edge_biases_->edge_property_)
+                     : nullptr;
+
+      rmm::device_uvector<vertex_t> start_vertices(start_vertices_->size_, handle_.get_stream());
+      raft::copy(start_vertices.data(),
+                 start_vertices_->as_type<vertex_t>(),
+                 start_vertices.size(),
+                 handle_.get_stream());
+
+      std::optional<rmm::device_uvector<label_t>> start_vertex_labels{std::nullopt};
+      std::optional<rmm::device_uvector<label_t>> local_label_to_comm_rank{std::nullopt};
+      std::optional<rmm::device_uvector<label_t>> label_to_comm_rank{
+        std::nullopt};  // global after allgatherv
+
+      std::optional<rmm::device_uvector<edge_t>> renumbered_and_sorted_edge_id_renumber_map(
+        std::nullopt);
+      std::optional<rmm::device_uvector<size_t>>
+        renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt);
+
+      if (start_vertex_offsets_ != nullptr) {
+        // Retrieve the start_vertex_labels
+        start_vertex_labels = cugraph::detail::convert_starting_vertex_label_offsets_to_labels(
+          handle_,
+          raft::device_span<size_t const>{start_vertex_offsets_->as_type<size_t>(),
+                                          start_vertex_offsets_->size_});
+
+        // Get the number of labels on each GPU
+
+        if constexpr (multi_gpu) {
+          auto num_local_labels = start_vertex_offsets_->size_ - 1;
+
+          auto global_labels = cugraph::host_scalar_allgather(
+            handle_.get_comms(), num_local_labels, handle_.get_stream());
+
+          std::exclusive_scan(
+            global_labels.begin(), global_labels.end(), global_labels.begin(), label_t{0});
+
+          // Compute the global start_vertex_label_offsets
+
+          cugraph::detail::transform_increment_ints(
+            raft::device_span<label_t>{(*start_vertex_labels).data(),
+                                       (*start_vertex_labels).size()},
+            (label_t)global_labels[handle_.get_comms().get_rank()],
+            handle_.get_stream());
+
+          rmm::device_uvector<label_t> unique_labels((*start_vertex_labels).size(),
+                                                     handle_.get_stream());
+          raft::copy(unique_labels.data(),
+                     (*start_vertex_labels).data(),
+                     unique_labels.size(),
+                     handle_.get_stream());
+
+          // Get unique labels
+          // sort the start_vertex_labels
+          cugraph::detail::sort_ints(
+            handle_.get_stream(),
+            raft::device_span<label_t>{unique_labels.data(), unique_labels.size()});
+
+          auto num_unique_labels = cugraph::detail::unique_ints(
+            handle_.get_stream(),
+            raft::device_span<label_t>{unique_labels.data(), unique_labels.size()});
+
+          (*local_label_to_comm_rank).resize(num_unique_labels, handle_.get_stream());
+
+          cugraph::detail::scalar_fill(
+            handle_.get_stream(),
+            (*local_label_to_comm_rank).begin(),  // This should be rename to rank
+            (*local_label_to_comm_rank).size(),
+            label_t{handle_.get_comms().get_rank()});
+
+          // Perform allgather to get global_label_to_comm_rank_d_vector
+          auto recvcounts = cugraph::host_scalar_allgather(
+            handle_.get_comms(), num_unique_labels, handle_.get_stream());
+
+          std::vector<size_t> displacements(recvcounts.size());
+          std::exclusive_scan(
+            recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0});
+
+          (*label_to_comm_rank)
+            .resize(displacements.back() + recvcounts.back(), handle_.get_stream());
+
+          cugraph::device_allgatherv(handle_.get_comms(),
+                                     (*local_label_to_comm_rank).begin(),
+                                     (*label_to_comm_rank).begin(),
+                                     recvcounts,
+                                     displacements,
+                                     handle_.get_stream());
+
+          std::tie(start_vertices, *start_vertex_labels) =
+            cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
+              handle_, std::move(start_vertices), std::move(*start_vertex_labels));
+        }
+      } else {
+        if constexpr (multi_gpu) {
+          start_vertices =
+            cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning(
+              handle_, std::move(start_vertices));
+        }
+      }
+      //
+      // Need to renumber start_vertices
+      //
+      cugraph::renumber_local_ext_vertices<vertex_t, multi_gpu>(
+        handle_,
+        start_vertices.data(),
+        start_vertices.size(),
+        number_map->data(),
+        graph_view.local_vertex_partition_range_first(),
+        graph_view.local_vertex_partition_range_last(),
+        do_expensive_check_);
+
+      rmm::device_uvector<vertex_t> src(0, handle_.get_stream());
+      rmm::device_uvector<vertex_t> dst(0, handle_.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt{std::nullopt};
+      std::optional<rmm::device_uvector<edge_t>> edge_id{std::nullopt};
+      std::optional<rmm::device_uvector<edge_type_t>> edge_type{std::nullopt};
+      std::optional<rmm::device_uvector<int32_t>> hop{std::nullopt};
+      std::optional<rmm::device_uvector<label_t>> edge_label{std::nullopt};
+      std::optional<rmm::device_uvector<size_t>> offsets{std::nullopt};
+
+      // FIXME: For biased sampling, the user should pass either biases or edge weights,
+      // otherwised throw an error and suggest the user to call uniform neighbor sample instead
+
+      if (num_edge_types_ > 1) {
+        // call heterogeneous neighbor sample
+        if (is_biased_) {
+          std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) =
+            cugraph::heterogeneous_biased_neighbor_sample(
+              handle_,
+              rng_state_->rng_state_,
+              graph_view,
+              (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
+              (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt,
+              (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt,
+              (edge_biases != nullptr) ? *edge_biases : edge_weights->view(),
+              raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
+              (start_vertex_offsets_ != nullptr)
+                ? std::make_optional<raft::device_span<int const>>((*start_vertex_labels).data(),
+                                                                   (*start_vertex_labels).size())
+                : std::nullopt,
+              label_to_comm_rank ? std::make_optional(raft::device_span<int const>{
+                                     (*label_to_comm_rank).data(), (*label_to_comm_rank).size()})
+                                 : std::nullopt,
+              raft::host_span<const int>(fan_out_->as_type<const int>(), fan_out_->size_),
+              num_edge_types_,
+              cugraph::sampling_flags_t{options_.prior_sources_behavior_,
+                                        options_.return_hops_,
+                                        options_.dedupe_sources_,
+                                        options_.with_replacement_},
+              do_expensive_check_);
+        } else {
+          std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) =
+            cugraph::heterogeneous_uniform_neighbor_sample(
+              handle_,
+              rng_state_->rng_state_,
+              graph_view,
+              (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
+              (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt,
+              (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt,
+              raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
+              (start_vertex_offsets_ != nullptr)
+                ? std::make_optional<raft::device_span<int const>>((*start_vertex_labels).data(),
+                                                                   (*start_vertex_labels).size())
+                : std::nullopt,
+              label_to_comm_rank ? std::make_optional(raft::device_span<int const>{
+                                     (*label_to_comm_rank).data(), (*label_to_comm_rank).size()})
+                                 : std::nullopt,
+              raft::host_span<const int>(fan_out_->as_type<const int>(), fan_out_->size_),
+              num_edge_types_,
+              cugraph::sampling_flags_t{options_.prior_sources_behavior_,
+                                        options_.return_hops_,
+                                        options_.dedupe_sources_,
+                                        options_.with_replacement_},
+              do_expensive_check_);
+        }
+      } else {
+        // Call homogeneous neighbor sample
+        if (is_biased_) {
+          std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) =
+            cugraph::homogeneous_biased_neighbor_sample(
+              handle_,
+              rng_state_->rng_state_,
+              graph_view,
+              (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
+              (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt,
+              (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt,
+              (edge_biases != nullptr) ? *edge_biases : edge_weights->view(),
+              raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
+              (start_vertex_offsets_ != nullptr)
+                ? std::make_optional<raft::device_span<int const>>((*start_vertex_labels).data(),
+                                                                   (*start_vertex_labels).size())
+                : std::nullopt,
+              label_to_comm_rank ? std::make_optional(raft::device_span<int const>{
+                                     (*label_to_comm_rank).data(), (*label_to_comm_rank).size()})
+                                 : std::nullopt,
+              raft::host_span<const int>(fan_out_->as_type<const int>(), fan_out_->size_),
+              cugraph::sampling_flags_t{options_.prior_sources_behavior_,
+                                        options_.return_hops_,
+                                        options_.dedupe_sources_,
+                                        options_.with_replacement_},
+              do_expensive_check_);
+        } else {
+          std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) =
+            cugraph::homogeneous_uniform_neighbor_sample(
+              handle_,
+              rng_state_->rng_state_,
+              graph_view,
+              (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
+              (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt,
+              (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt,
+              raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
+              (start_vertex_offsets_ != nullptr)
+                ? std::make_optional<raft::device_span<int const>>((*start_vertex_labels).data(),
+                                                                   (*start_vertex_labels).size())
+                : std::nullopt,
+              label_to_comm_rank ? std::make_optional(raft::device_span<int const>{
+                                     (*label_to_comm_rank).data(), (*label_to_comm_rank).size()})
+                                 : std::nullopt,
+              raft::host_span<const int>(fan_out_->as_type<const int>(), fan_out_->size_),
+              cugraph::sampling_flags_t{options_.prior_sources_behavior_,
+                                        options_.return_hops_,
+                                        options_.dedupe_sources_,
+                                        options_.with_replacement_},
+              do_expensive_check_);
+        }
+      }
+
+      std::vector<vertex_t> vertex_partition_lasts = graph_view.vertex_partition_range_lasts();
+
+      cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(handle_,
+                                                            src.data(),
+                                                            src.size(),
+                                                            number_map->data(),
+                                                            vertex_partition_lasts,
+                                                            do_expensive_check_);
+
+      cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(handle_,
+                                                            dst.data(),
+                                                            dst.size(),
+                                                            number_map->data(),
+                                                            vertex_partition_lasts,
+                                                            do_expensive_check_);
+
+      std::optional<rmm::device_uvector<vertex_t>> majors{std::nullopt};
+      rmm::device_uvector<vertex_t> minors(0, handle_.get_stream());
+      std::optional<rmm::device_uvector<size_t>> major_offsets{std::nullopt};
+
+      std::optional<rmm::device_uvector<size_t>> label_hop_offsets{std::nullopt};
+
+      std::optional<rmm::device_uvector<vertex_t>> renumber_map{std::nullopt};
+      std::optional<rmm::device_uvector<size_t>> renumber_map_offsets{std::nullopt};
+
+      bool src_is_major = (options_.compression_type_ == cugraph_compression_type_t::CSR) ||
+                          (options_.compression_type_ == cugraph_compression_type_t::DCSR) ||
+                          (options_.compression_type_ == cugraph_compression_type_t::COO);
+
+      // Extract the edge_label from the offsets
+      if (offsets) {
+        edge_label = cugraph::c_api::expand_sparse_offsets(
+          raft::device_span<size_t const>{(*offsets).data(), (*offsets).size()},
+          label_t{0},
+          handle_.get_stream());
+      }
+
+      if (options_.renumber_results_) {
+        if (num_edge_types_ == 1) {  // homogeneous renumbering
+          if (options_.compression_type_ == cugraph_compression_type_t::COO) {
+            // COO
+
+            rmm::device_uvector<vertex_t> output_majors(0, handle_.get_stream());
+            rmm::device_uvector<vertex_t> output_renumber_map(0, handle_.get_stream());
+            std::tie(output_majors,
+                     minors,
+                     wgt,
+                     edge_id,
+                     edge_type,
+                     label_hop_offsets,
+                     output_renumber_map,
+                     renumber_map_offsets) =
+              cugraph::renumber_and_sort_sampled_edgelist<vertex_t>(
+                handle_,
+                std::move(src),
+                std::move(dst),
+                std::move(wgt),
+                std::move(edge_id),
+                std::move(edge_type),
+                std::move(hop),
+                options_.retain_seeds_
+                  ? std::make_optional(raft::device_span<vertex_t const>{
+                      start_vertices_->as_type<vertex_t>(), start_vertices_->size_})
+                  : std::nullopt,
+                options_.retain_seeds_
+                  ? std::make_optional(raft::device_span<size_t const>{
+                      start_vertex_offsets_->as_type<size_t>(), start_vertex_offsets_->size_})
+                  : std::nullopt,
+                offsets ? std::make_optional(
+                            raft::device_span<size_t const>{offsets->data(), offsets->size()})
+                        : std::nullopt,
+                offsets ? (*offsets).size() - 1 : size_t{1},
+                hop ? fan_out_->size_ : size_t{1},
+                src_is_major,
+                do_expensive_check_);
+
+            majors.emplace(std::move(output_majors));
+            renumber_map.emplace(std::move(output_renumber_map));
+          } else {
+            // (D)CSC, (D)CSR
+
+            bool doubly_compress =
+              (options_.compression_type_ == cugraph_compression_type_t::DCSR) ||
+              (options_.compression_type_ == cugraph_compression_type_t::DCSC);
+
+            rmm::device_uvector<size_t> output_major_offsets(0, handle_.get_stream());
+            rmm::device_uvector<vertex_t> output_renumber_map(0, handle_.get_stream());
+
+            std::tie(majors,
+                     output_major_offsets,
+                     minors,
+                     wgt,
+                     edge_id,
+                     edge_type,
+                     label_hop_offsets,
+                     output_renumber_map,
+                     renumber_map_offsets) =
+              cugraph::renumber_and_compress_sampled_edgelist<vertex_t>(
+                handle_,
+                std::move(src),
+                std::move(dst),
+                std::move(wgt),
+                std::move(edge_id),
+                std::move(edge_type),
+                std::move(hop),
+                options_.retain_seeds_
+                  ? std::make_optional(raft::device_span<vertex_t const>{
+                      start_vertices_->as_type<vertex_t>(), start_vertices_->size_})
+                  : std::nullopt,
+                options_.retain_seeds_
+                  ? std::make_optional(raft::device_span<size_t const>{
+                      start_vertex_offsets_->as_type<size_t>(), start_vertex_offsets_->size_})
+                  : std::nullopt,
+                offsets ? std::make_optional(
+                            raft::device_span<size_t const>{offsets->data(), offsets->size()})
+                        : std::nullopt,
+                edge_label ? (*offsets).size() - 1 : size_t{1},  // FIXME: update edge_label
+                hop ? fan_out_->size_ : size_t{1},
+                src_is_major,
+                options_.compress_per_hop_,
+                doubly_compress,
+                do_expensive_check_);
+
+            renumber_map.emplace(std::move(output_renumber_map));
+            major_offsets.emplace(std::move(output_major_offsets));
+          }
+
+          // These are now represented by label_hop_offsets
+          hop.reset();
+          offsets.reset();
+
+        } else {  // heterogeneous renumbering
+
+          rmm::device_uvector<vertex_t> vertex_type_offsets(
+            graph_view.local_vertex_partition_range_size(), handle_.get_stream());
+
+          cugraph::detail::sequence_fill(handle_.get_stream(),
+                                         vertex_type_offsets.begin(),
+                                         vertex_type_offsets.size(),
+                                         vertex_t{0}  // FIXME: Update array
+          );
+
+          rmm::device_uvector<vertex_t> output_majors(0, handle_.get_stream());
+          rmm::device_uvector<vertex_t> output_renumber_map(0, handle_.get_stream());
+
+          // extract the edge_type from label_type_hop_offsets
+          std::optional<rmm::device_uvector<size_t>> label_type_hop_offsets{std::nullopt};
+          std::tie(output_majors,
+                   minors,
+                   wgt,
+                   edge_id,
+                   label_type_hop_offsets,  // Contains information about the type and hop offsets
+                   output_renumber_map,
+                   (*renumber_map_offsets),
+                   renumbered_and_sorted_edge_id_renumber_map,
+                   renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) =
+            cugraph::heterogeneous_renumber_and_sort_sampled_edgelist<vertex_t>(
+              handle_,
+              std::move(src),
+              std::move(dst),
+              std::move(wgt),
+              std::move(edge_id),
+              std::move(edge_type),
+              std::move(hop),
+              options_.retain_seeds_
+                ? std::make_optional(raft::device_span<vertex_t const>{
+                    start_vertices_->as_type<vertex_t>(), start_vertices_->size_})
+                : std::nullopt,
+              options_.retain_seeds_
+                ? std::make_optional(raft::device_span<size_t const>{
+                    start_vertex_offsets_->as_type<size_t>(), start_vertex_offsets_->size_})
+                : std::nullopt,
+              offsets ? std::make_optional(
+                          raft::device_span<size_t const>{offsets->data(), offsets->size()})
+                      : std::nullopt,
+              raft::device_span<vertex_t const>{vertex_type_offsets.data(),
+                                                vertex_type_offsets.size()},
+
+              edge_label ? (*offsets).size() - 1 : size_t{1},
+              hop ? fan_out_->size_ : size_t{1},
+              size_t{1},
+              num_edge_types_,
+              src_is_major,
+              do_expensive_check_);
+          if (edge_type) {
+            (*edge_type)
+              .resize(raft::device_span<size_t const>{(*label_type_hop_offsets).data(),
+                                                      (*label_type_hop_offsets).size()}
+                          .back() -
+                        1,
+                      handle_.get_stream());
+            cugraph::detail::sequence_fill(
+              handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0});
+          }
+
+          majors.emplace(std::move(output_majors));
+          // FIXME: Need to update renumber_map because default values are being passed
+          renumber_map.emplace(std::move(output_renumber_map));
+        }
+
+      } else {
+        if (options_.compression_type_ != cugraph_compression_type_t::COO) {
+          CUGRAPH_FAIL("Can only use COO format if not renumbering");
+        }
+
+        std::tie(src, dst, wgt, edge_id, edge_type, label_hop_offsets) =
+          cugraph::sort_sampled_edgelist(handle_,
+                                         std::move(src),
+                                         std::move(dst),
+                                         std::move(wgt),
+                                         std::move(edge_id),
+                                         std::move(edge_type),
+                                         std::move(hop),
+                                         offsets
+                                           ? std::make_optional(raft::device_span<size_t const>{
+                                               offsets->data(), offsets->size()})
+                                           : std::nullopt,
+                                         // derive label size from offset size instead of performing
+                                         // thrust::unique on edge_label.
+                                         edge_label ? (*offsets).size() - 1 : size_t{1},
+                                         hop ? fan_out_->size_ : size_t{1},
+                                         src_is_major,
+                                         do_expensive_check_);
+
+        majors.emplace(std::move(src));
+        minors = std::move(dst);
+
+        hop.reset();
+        offsets.reset();
+      }
+
+      result_ = new cugraph::c_api::cugraph_sample_result_t{
+        (major_offsets)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(*major_offsets, SIZE_T)
+          : nullptr,
+        (majors)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(*majors, graph_->vertex_type_)
+          : nullptr,
+        new cugraph::c_api::cugraph_type_erased_device_array_t(minors, graph_->vertex_type_),
+        (edge_id)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edge_id, graph_->edge_type_)
+          : nullptr,
+        (edge_type) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+                        *edge_type, graph_->edge_type_id_type_)
+                    : nullptr,
+        (wgt) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*wgt, graph_->weight_type_)
+              : nullptr,
+        (hop) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*hop, INT32)
+              : nullptr,  // FIXME get rid of this
+        (label_hop_offsets)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T)
+          : nullptr,
+        (edge_label)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32)
+          : nullptr,
+        (renumber_map) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+                           renumber_map.value(), graph_->vertex_type_)
+                       : nullptr,
+        (renumber_map_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+                                   renumber_map_offsets.value(), SIZE_T)
+                               : nullptr,
+        (renumbered_and_sorted_edge_id_renumber_map)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+              renumbered_and_sorted_edge_id_renumber_map.value(), graph_->edge_type_)
+          : nullptr,
+        (renumbered_and_sorted_edge_id_renumber_map_label_type_offsets)
+          ? new cugraph::c_api::cugraph_type_erased_device_array_t(
+              renumbered_and_sorted_edge_id_renumber_map_label_type_offsets.value(), SIZE_T)
+          : nullptr};
     }
   }
 };
@@ -985,6 +1592,26 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_re
                internal_pointer->renumber_map_offsets_->view());
 }
 
+extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_renumber_map(
+  const cugraph_sample_result_t* result)
+{
+  auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_sample_result_t const*>(result);
+  return internal_pointer->renumber_map_ == nullptr
+           ? NULL
+           : reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
+               internal_pointer->edge_renumber_map_->view());
+}
+
+extern "C" cugraph_type_erased_device_array_view_t*
+cugraph_sample_result_get_edge_renumber_map_offsets(const cugraph_sample_result_t* result)
+{
+  auto internal_pointer = reinterpret_cast<cugraph::c_api::cugraph_sample_result_t const*>(result);
+  return internal_pointer->renumber_map_ == nullptr
+           ? NULL
+           : reinterpret_cast<cugraph_type_erased_device_array_view_t*>(
+               internal_pointer->edge_renumber_map_offsets_->view());
+}
+
 extern "C" cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create(
   const cugraph_resource_handle_t* handle,
   const cugraph_type_erased_device_array_view_t* srcs,
@@ -1292,6 +1919,7 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample(
     "fan_out should be of type int",
     *error);
 
+  //  Deprecated functor
   uniform_neighbor_sampling_functor functor{handle,
                                             graph,
                                             start_vertices,
@@ -1369,6 +1997,7 @@ cugraph_error_code_t cugraph_biased_neighbor_sample(
     "fan_out should be of type int",
     *error);
 
+  // Deprecated functor
   biased_neighbor_sampling_functor functor{handle,
                                            graph,
                                            edge_biases,
@@ -1383,3 +2012,249 @@ cugraph_error_code_t cugraph_biased_neighbor_sample(
                                            do_expensive_check};
   return cugraph::c_api::run_algorithm(graph, functor, result, error);
 }
+
+cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* start_vertex_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  int num_edge_types,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error)
+{
+  auto options_cpp = *reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options);
+
+  // FIXME: Should we maintain this contition?
+  CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr),
+               CUGRAPH_INVALID_INPUT,
+               "must specify start_vertex_offsets if retain_seeds is true",
+               *error);
+
+  CAPI_EXPECTS((start_vertex_offsets == nullptr) ||
+                 (reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                    start_vertex_offsets)
+                    ->type_ == SIZE_T),
+               CUGRAPH_INVALID_INPUT,
+               "start_vertex_offsets should be of type size_t",
+               *error);
+
+  CAPI_EXPECTS(
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)
+        ->type_ == INT32,
+    CUGRAPH_INVALID_INPUT,
+    "fan_out should be of type int",
+    *error);
+
+  CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                 reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                   start_vertices)
+                   ->type_,
+               CUGRAPH_INVALID_INPUT,
+               "vertex type of graph and start_vertices must match",
+               *error);
+
+  neighbor_sampling_functor functor{handle,
+                                    rng_state,
+                                    graph,
+                                    nullptr,
+                                    start_vertices,
+                                    start_vertex_offsets,
+                                    fan_out,
+                                    num_edge_types,
+                                    std::move(options_cpp),
+                                    FALSE,
+                                    do_expensive_check};
+  return cugraph::c_api::run_algorithm(graph, functor, result, error);
+}
+
+cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_edge_property_view_t* edge_biases,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* start_vertex_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  int num_edge_types,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error)
+{
+  auto options_cpp = *reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options);
+
+  CAPI_EXPECTS(
+    (edge_biases != nullptr) ||
+      (reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr),
+    CUGRAPH_INVALID_INPUT,
+    "edge_biases is required if the graph is not weighted",
+    *error);
+
+  // FIXME: Should we maintain this contition?
+  CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr),
+               CUGRAPH_INVALID_INPUT,
+               "must specify start_vertex_offsets if retain_seeds is true",
+               *error);
+
+  CAPI_EXPECTS((start_vertex_offsets == nullptr) ||
+                 (reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                    start_vertex_offsets)
+                    ->type_ == SIZE_T),
+               CUGRAPH_INVALID_INPUT,
+               "start_vertex_offsets should be of type size_t",
+               *error);
+
+  CAPI_EXPECTS(
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)
+        ->type_ == INT32,
+    CUGRAPH_INVALID_INPUT,
+    "fan_out should be of type int",
+    *error);
+
+  CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                 reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                   start_vertices)
+                   ->type_,
+               CUGRAPH_INVALID_INPUT,
+               "vertex type of graph and start_vertices must match",
+               *error);
+
+  neighbor_sampling_functor functor{handle,
+                                    rng_state,
+                                    graph,
+                                    edge_biases,
+                                    start_vertices,
+                                    start_vertex_offsets,
+                                    fan_out,
+                                    num_edge_types,
+                                    std::move(options_cpp),
+                                    TRUE,
+                                    do_expensive_check};
+  return cugraph::c_api::run_algorithm(graph, functor, result, error);
+}
+
+cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* start_vertex_offsets,  // RENAME?
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error)
+{
+  auto options_cpp = *reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options);
+
+  // FIXME: Should we maintain this contition?
+  CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr),
+               CUGRAPH_INVALID_INPUT,
+               "must specify start_vertex_offsets if retain_seeds is true",
+               *error);
+
+  CAPI_EXPECTS((start_vertex_offsets == nullptr) ||
+                 (reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                    start_vertex_offsets)
+                    ->type_ == SIZE_T),
+               CUGRAPH_INVALID_INPUT,
+               "start_vertex_offsets should be of type size_t",
+               *error);
+
+  CAPI_EXPECTS(
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)
+        ->type_ == INT32,
+    CUGRAPH_INVALID_INPUT,
+    "fan_out type must be INT32",
+    *error);
+
+  CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                 reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                   start_vertices)
+                   ->type_,
+               CUGRAPH_INVALID_INPUT,
+               "vertex type of graph and start_vertices must match",
+               *error);
+
+  neighbor_sampling_functor functor{handle,
+                                    rng_state,
+                                    graph,
+                                    nullptr,
+                                    start_vertices,
+                                    start_vertex_offsets,
+                                    fan_out,
+                                    1,  // num_edge_types
+                                    std::move(options_cpp),
+                                    FALSE,
+                                    do_expensive_check};
+  return cugraph::c_api::run_algorithm(graph, functor, result, error);
+}
+
+cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample(
+  const cugraph_resource_handle_t* handle,
+  cugraph_rng_state_t* rng_state,
+  cugraph_graph_t* graph,
+  const cugraph_edge_property_view_t* edge_biases,
+  const cugraph_type_erased_device_array_view_t* start_vertices,
+  const cugraph_type_erased_device_array_view_t* start_vertex_offsets,
+  const cugraph_type_erased_host_array_view_t* fan_out,
+  const cugraph_sampling_options_t* options,
+  bool_t do_expensive_check,
+  cugraph_sample_result_t** result,
+  cugraph_error_t** error)
+{
+  auto options_cpp = *reinterpret_cast<cugraph::c_api::cugraph_sampling_options_t const*>(options);
+
+  CAPI_EXPECTS(
+    (edge_biases != nullptr) ||
+      (reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr),
+    CUGRAPH_INVALID_INPUT,
+    "edge_biases is required if the graph is not weighted",
+    *error);
+
+  // FIXME: Should we maintain this contition?
+  CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr),
+               CUGRAPH_INVALID_INPUT,
+               "must specify start_vertex_offsets if retain_seeds is true",
+               *error);
+
+  CAPI_EXPECTS((start_vertex_offsets == nullptr) ||
+                 (reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                    start_vertex_offsets)
+                    ->type_ == SIZE_T),
+               CUGRAPH_INVALID_INPUT,
+               "start_vertex_offsets should be of type size_t",
+               *error);
+
+  CAPI_EXPECTS(
+    reinterpret_cast<cugraph::c_api::cugraph_type_erased_host_array_view_t const*>(fan_out)
+        ->type_ == INT32,
+    CUGRAPH_INVALID_INPUT,
+    "fan_out type must be INT32",
+    *error);
+
+  CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
+                 reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
+                   start_vertices)
+                   ->type_,
+               CUGRAPH_INVALID_INPUT,
+               "vertex type of graph and start_vertices must match",
+               *error);
+
+  neighbor_sampling_functor functor{handle,
+                                    rng_state,
+                                    graph,
+                                    edge_biases,
+                                    start_vertices,
+                                    start_vertex_offsets,
+                                    fan_out,
+                                    1,  // num_edge_types
+                                    std::move(options_cpp),
+                                    TRUE,
+                                    do_expensive_check};
+  return cugraph::c_api::run_algorithm(graph, functor, result, error);
+}
diff --git a/cpp/src/detail/utility_wrappers_32.cu b/cpp/src/detail/utility_wrappers_32.cu
index de407f12493..879a1adf337 100644
--- a/cpp/src/detail/utility_wrappers_32.cu
+++ b/cpp/src/detail/utility_wrappers_32.cu
@@ -63,6 +63,10 @@ template void scalar_fill(raft::handle_t const& handle, size_t* d_value, size_t
 
 template void scalar_fill(raft::handle_t const& handle, float* d_value, size_t size, float value);
 
+template void sort_ints(raft::handle_t const& handle, raft::device_span<int32_t> values);
+
+template size_t unique_ints(raft::handle_t const& handle, raft::device_span<int32_t> values);
+
 template void sequence_fill(rmm::cuda_stream_view const& stream_view,
                             int32_t* d_value,
                             size_t size,
@@ -73,6 +77,10 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view,
                             size_t size,
                             uint32_t start_value);
 
+template void transform_increment_ints(raft::device_span<int32_t> values,
+                                       int32_t value,
+                                       rmm::cuda_stream_view const& stream_view);
+
 template void stride_fill(rmm::cuda_stream_view const& stream_view,
                           int32_t* d_value,
                           size_t size,
diff --git a/cpp/src/detail/utility_wrappers_64.cu b/cpp/src/detail/utility_wrappers_64.cu
index 2c136d5902b..742cb18d718 100644
--- a/cpp/src/detail/utility_wrappers_64.cu
+++ b/cpp/src/detail/utility_wrappers_64.cu
@@ -61,6 +61,10 @@ template void scalar_fill(raft::handle_t const& handle,
 
 template void scalar_fill(raft::handle_t const& handle, double* d_value, size_t size, double value);
 
+template void sort_ints(raft::handle_t const& handle, raft::device_span<int64_t> values);
+
+template size_t unique_ints(raft::handle_t const& handle, raft::device_span<int64_t> values);
+
 template void sequence_fill(rmm::cuda_stream_view const& stream_view,
                             int64_t* d_value,
                             size_t size,
@@ -71,6 +75,10 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view,
                             size_t size,
                             uint64_t start_value);
 
+template void transform_increment_ints(raft::device_span<int64_t> values,
+                                       int64_t value,
+                                       rmm::cuda_stream_view const& stream_view);
+
 template void stride_fill(rmm::cuda_stream_view const& stream_view,
                           int64_t* d_value,
                           size_t size,
diff --git a/cpp/src/detail/utility_wrappers_impl.cuh b/cpp/src/detail/utility_wrappers_impl.cuh
index 074d7044261..93bd14c4d06 100644
--- a/cpp/src/detail/utility_wrappers_impl.cuh
+++ b/cpp/src/detail/utility_wrappers_impl.cuh
@@ -36,6 +36,7 @@
 #include <thrust/transform.h>
 #include <thrust/transform_reduce.h>
 #include <thrust/tuple.h>
+#include <thrust/unique.h>
 
 namespace cugraph {
 namespace detail {
@@ -63,6 +64,20 @@ void scalar_fill(raft::handle_t const& handle, value_t* d_value, size_t size, va
   thrust::fill_n(handle.get_thrust_policy(), d_value, size, value);
 }
 
+template <typename value_t>
+void sort_ints(raft::handle_t const& handle, raft::device_span<value_t> values)
+{
+  thrust::sort(handle.get_thrust_policy(), values.begin(), values.end());
+}
+
+template <typename value_t>
+size_t unique_ints(raft::handle_t const& handle, raft::device_span<value_t> values)
+{
+  auto unique_element_last =
+    thrust::unique(handle.get_thrust_policy(), values.begin(), values.end());
+  return thrust::distance(values.begin(), unique_element_last);
+}
+
 template <typename value_t>
 void sequence_fill(rmm::cuda_stream_view const& stream_view,
                    value_t* d_value,
@@ -72,6 +87,20 @@ void sequence_fill(rmm::cuda_stream_view const& stream_view,
   thrust::sequence(rmm::exec_policy(stream_view), d_value, d_value + size, start_value);
 }
 
+template <typename value_t>
+void transform_increment_ints(raft::device_span<value_t> values,
+                              value_t incr,
+                              rmm::cuda_stream_view const& stream_view)
+{
+  thrust::transform(rmm::exec_policy(stream_view),
+                    values.begin(),
+                    values.end(),
+                    values.begin(),
+                    cuda::proclaim_return_type<value_t>([incr] __device__(value_t value) {
+                      return static_cast<value_t>(value + incr);
+                    }));
+}
+
 template <typename value_t>
 void stride_fill(rmm::cuda_stream_view const& stream_view,
                  value_t* d_value,
diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh
index b39895129dc..00f73b5c263 100644
--- a/cpp/src/link_prediction/similarity_impl.cuh
+++ b/cpp/src/link_prediction/similarity_impl.cuh
@@ -287,10 +287,8 @@ all_pairs_similarity(raft::handle_t const& handle,
     //  computing/updating topk with each batch
 
     //   FIXME: Experiment with this and adjust as necessary
-    // size_t const
-    // MAX_PAIRS_PER_BATCH{static_cast<size_t>(handle.get_device_properties().multiProcessorCount) *
-    // (1 << 15)};
-    size_t const MAX_PAIRS_PER_BATCH{100};
+    size_t const MAX_PAIRS_PER_BATCH{
+      static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * (1 << 15)};
 
     rmm::device_uvector<edge_t> degrees = graph_view.compute_out_degrees(handle);
     rmm::device_uvector<size_t> two_hop_degrees(degrees.size() + 1, handle.get_stream());
@@ -362,195 +360,205 @@ all_pairs_similarity(raft::handle_t const& handle,
                       1,
                       handle.get_stream());
 
+    handle.sync_stream();
+
     std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks(
       handle,
       raft::device_span<size_t const>{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()},
       sum_two_hop_degrees,
       MAX_PAIRS_PER_BATCH);
 
-    for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) {
-      if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) {
-        auto [offsets, v2] =
-          k_hop_nbrs(handle,
-                     graph_view,
-                     raft::device_span<vertex_t const>{
-                       tmp_vertices.data() + batch_offsets[batch_number],
-                       batch_offsets[batch_number + 1] - batch_offsets[batch_number]},
-                     2,
-                     do_expensive_check);
-
-        auto v1 = cugraph::detail::expand_sparse_offsets(
-          raft::device_span<size_t const>{offsets.data(), offsets.size()},
-          vertex_t{0},
-          handle.get_stream());
+    // FIXME: compute_offset_aligned_element_chunks can return duplicates.  Should it?  Should
+    // explore
+    //  whether this functionality should be pushed into that function
+    batch_offsets.resize(std::distance(batch_offsets.begin(),
+                                       std::unique(batch_offsets.begin(), batch_offsets.end())));
 
-        cugraph::unrenumber_local_int_vertices(
-          handle,
-          v1.data(),
-          v1.size(),
+    size_t num_batches = batch_offsets.size() - 1;
+    if constexpr (multi_gpu) {
+      num_batches = cugraph::host_scalar_allreduce(
+        handle.get_comms(), num_batches, raft::comms::op_t::MAX, handle.get_stream());
+    }
+
+    for (size_t batch_number = 0; batch_number < num_batches; ++batch_number) {
+      raft::device_span<vertex_t const> batch_seeds{tmp_vertices.data(), size_t{0}};
+
+      if (((batch_number + 1) < batch_offsets.size()) &&
+          (batch_offsets[batch_number + 1] > batch_offsets[batch_number])) {
+        batch_seeds = raft::device_span<vertex_t const>{
           tmp_vertices.data() + batch_offsets[batch_number],
-          vertex_t{0},
-          static_cast<vertex_t>(batch_offsets[batch_number + 1] - batch_offsets[batch_number]),
-          do_expensive_check);
+          batch_offsets[batch_number + 1] - batch_offsets[batch_number]};
+      }
+
+      auto [offsets, v2] = k_hop_nbrs(handle, graph_view, batch_seeds, 2, do_expensive_check);
 
-        auto new_size = thrust::distance(
+      auto v1 = cugraph::detail::expand_sparse_offsets(
+        raft::device_span<size_t const>{offsets.data(), offsets.size()},
+        vertex_t{0},
+        handle.get_stream());
+
+      cugraph::unrenumber_local_int_vertices(
+        handle,
+        v1.data(),
+        v1.size(),
+        tmp_vertices.data() + batch_offsets[batch_number],
+        vertex_t{0},
+        static_cast<vertex_t>(batch_offsets[batch_number + 1] - batch_offsets[batch_number]),
+        do_expensive_check);
+
+      auto new_size = thrust::distance(
+        thrust::make_zip_iterator(v1.begin(), v2.begin()),
+        thrust::remove_if(
+          handle.get_thrust_policy(),
           thrust::make_zip_iterator(v1.begin(), v2.begin()),
-          thrust::remove_if(
-            handle.get_thrust_policy(),
-            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-            thrust::make_zip_iterator(v1.end(), v2.end()),
-            [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
-
-        v1.resize(new_size, handle.get_stream());
-        v2.resize(new_size, handle.get_stream());
-
-        if constexpr (multi_gpu) {
-          // shuffle vertex pairs
-          auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
-
-          std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
-            detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                           edge_t,
-                                                                                           weight_t,
-                                                                                           int>(
-              handle,
-              std::move(v1),
-              std::move(v2),
-              std::nullopt,
-              std::nullopt,
-              std::nullopt,
-              vertex_partition_range_lasts);
-        }
+          thrust::make_zip_iterator(v1.end(), v2.end()),
+          [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
 
-        auto score =
-          similarity(handle,
-                     graph_view,
-                     edge_weight_view,
-                     std::make_tuple(raft::device_span<vertex_t const>{v1.data(), v1.size()},
-                                     raft::device_span<vertex_t const>{v2.data(), v2.size()}),
-                     functor,
-                     coeff,
-                     do_expensive_check);
-
-        // Add a remove_if to remove items that are less than the last topk element
-        new_size = thrust::distance(
-          thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
-          thrust::remove_if(handle.get_thrust_policy(),
-                            thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
-                            thrust::make_zip_iterator(score.end(), v1.end(), v2.end()),
-                            [similarity_threshold] __device__(auto tuple) {
-                              return thrust::get<0>(tuple) < similarity_threshold;
-                            }));
-
-        score.resize(new_size, handle.get_stream());
-        v1.resize(new_size, handle.get_stream());
-        v2.resize(new_size, handle.get_stream());
-
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            score.begin(),
-                            score.end(),
-                            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-                            thrust::greater<weight_t>{});
-
-        size_t v1_keep = std::min(*topk, v1.size());
-
-        if (score.size() < (top_v1.size() + v1_keep)) {
-          score.resize(top_v1.size() + v1_keep, handle.get_stream());
-          v1.resize(score.size(), handle.get_stream());
-          v2.resize(score.size(), handle.get_stream());
-        }
+      v1.resize(new_size, handle.get_stream());
+      v2.resize(new_size, handle.get_stream());
 
-        thrust::copy(
-          handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep);
-        thrust::copy(
-          handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep);
-        thrust::copy(
-          handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep);
-
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            score.begin(),
-                            score.end(),
-                            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-                            thrust::greater<weight_t>{});
-
-        if (top_v1.size() < std::min(*topk, v1.size())) {
-          top_v1.resize(std::min(*topk, v1.size()), handle.get_stream());
-          top_v2.resize(top_v1.size(), handle.get_stream());
-          top_score.resize(top_v1.size(), handle.get_stream());
-        }
+      if constexpr (multi_gpu) {
+        // shuffle vertex pairs
+        auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
+
+        std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int>(
+            handle,
+            std::move(v1),
+            std::move(v2),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            vertex_partition_range_lasts);
+      }
 
-        thrust::copy(
-          handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin());
-        thrust::copy(
-          handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin());
-        thrust::copy(handle.get_thrust_policy(),
-                     score.begin(),
-                     score.begin() + top_v1.size(),
-                     top_score.begin());
-
-        if constexpr (multi_gpu) {
-          bool is_root  = handle.get_comms().get_rank() == int{0};
-          auto rx_sizes = cugraph::host_scalar_gather(
-            handle.get_comms(), top_v1.size(), int{0}, handle.get_stream());
-          std::vector<size_t> rx_displs;
-          size_t gathered_size{0};
-
-          if (is_root) {
-            rx_displs.resize(handle.get_comms().get_size());
-            rx_displs[0] = 0;
-            std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1);
-            gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end());
-          }
+      auto score =
+        similarity(handle,
+                   graph_view,
+                   edge_weight_view,
+                   std::make_tuple(raft::device_span<vertex_t const>{v1.data(), v1.size()},
+                                   raft::device_span<vertex_t const>{v2.data(), v2.size()}),
+                   functor,
+                   coeff,
+                   do_expensive_check);
+
+      // Add a remove_if to remove items that are less than the last topk element
+      new_size = thrust::distance(
+        thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
+        thrust::remove_if(handle.get_thrust_policy(),
+                          thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
+                          thrust::make_zip_iterator(score.end(), v1.end(), v2.end()),
+                          [similarity_threshold] __device__(auto tuple) {
+                            return thrust::get<0>(tuple) < similarity_threshold;
+                          }));
+
+      score.resize(new_size, handle.get_stream());
+      v1.resize(new_size, handle.get_stream());
+      v2.resize(new_size, handle.get_stream());
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          score.begin(),
+                          score.end(),
+                          thrust::make_zip_iterator(v1.begin(), v2.begin()),
+                          thrust::greater<weight_t>{});
+
+      size_t v1_keep = std::min(*topk, v1.size());
+
+      if (score.size() < (top_v1.size() + v1_keep)) {
+        score.resize(top_v1.size() + v1_keep, handle.get_stream());
+        v1.resize(score.size(), handle.get_stream());
+        v2.resize(score.size(), handle.get_stream());
+      }
 
-          rmm::device_uvector<vertex_t> gathered_v1(gathered_size, handle.get_stream());
-          rmm::device_uvector<vertex_t> gathered_v2(gathered_size, handle.get_stream());
-          rmm::device_uvector<weight_t> gathered_score(gathered_size, handle.get_stream());
-
-          cugraph::device_gatherv(
-            handle.get_comms(),
-            thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()),
-            thrust::make_zip_iterator(
-              gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()),
-
-            top_v1.size(),
-            rx_sizes,
-            rx_displs,
-            int{0},
-            handle.get_stream());
-
-          if (is_root) {
-            thrust::sort_by_key(handle.get_thrust_policy(),
-                                gathered_score.begin(),
-                                gathered_score.end(),
-                                thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()),
-                                thrust::greater<weight_t>{});
-
-            if (gathered_v1.size() > *topk) {
-              gathered_v1.resize(*topk, handle.get_stream());
-              gathered_v2.resize(*topk, handle.get_stream());
-              gathered_score.resize(*topk, handle.get_stream());
-            }
-
-            top_v1    = std::move(gathered_v1);
-            top_v2    = std::move(gathered_v2);
-            top_score = std::move(gathered_score);
-          } else {
-            top_v1.resize(0, handle.get_stream());
-            top_v2.resize(0, handle.get_stream());
-            top_score.resize(0, handle.get_stream());
-          }
+      thrust::copy(handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep);
+      thrust::copy(handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep);
+      thrust::copy(
+        handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep);
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          score.begin(),
+                          score.end(),
+                          thrust::make_zip_iterator(v1.begin(), v2.begin()),
+                          thrust::greater<weight_t>{});
+
+      if (top_v1.size() < std::min(*topk, v1.size())) {
+        top_v1.resize(std::min(*topk, v1.size()), handle.get_stream());
+        top_v2.resize(top_v1.size(), handle.get_stream());
+        top_score.resize(top_v1.size(), handle.get_stream());
+      }
+
+      thrust::copy(
+        handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin());
+      thrust::copy(
+        handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin());
+      thrust::copy(handle.get_thrust_policy(),
+                   score.begin(),
+                   score.begin() + top_v1.size(),
+                   top_score.begin());
+
+      if constexpr (multi_gpu) {
+        bool is_root  = handle.get_comms().get_rank() == int{0};
+        auto rx_sizes = cugraph::host_scalar_gather(
+          handle.get_comms(), top_v1.size(), int{0}, handle.get_stream());
+        std::vector<size_t> rx_displs;
+        size_t gathered_size{0};
+
+        if (is_root) {
+          rx_displs.resize(handle.get_comms().get_size());
+          rx_displs[0] = 0;
+          std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1);
+          gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end());
         }
 
-        if (top_score.size() == *topk) {
-          raft::update_host(
-            &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream());
+        rmm::device_uvector<vertex_t> gathered_v1(gathered_size, handle.get_stream());
+        rmm::device_uvector<vertex_t> gathered_v2(gathered_size, handle.get_stream());
+        rmm::device_uvector<weight_t> gathered_score(gathered_size, handle.get_stream());
+
+        cugraph::device_gatherv(
+          handle.get_comms(),
+          thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()),
+          thrust::make_zip_iterator(
+            gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()),
+          top_v1.size(),
+          rx_sizes,
+          rx_displs,
+          int{0},
+          handle.get_stream());
 
-          if constexpr (multi_gpu) {
-            similarity_threshold = host_scalar_bcast(
-              handle.get_comms(), similarity_threshold, int{0}, handle.get_stream());
+        if (is_root) {
+          thrust::sort_by_key(handle.get_thrust_policy(),
+                              gathered_score.begin(),
+                              gathered_score.end(),
+                              thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()),
+                              thrust::greater<weight_t>{});
+
+          if (gathered_v1.size() > *topk) {
+            gathered_v1.resize(*topk, handle.get_stream());
+            gathered_v2.resize(*topk, handle.get_stream());
+            gathered_score.resize(*topk, handle.get_stream());
           }
+
+          top_v1    = std::move(gathered_v1);
+          top_v2    = std::move(gathered_v2);
+          top_score = std::move(gathered_score);
+        } else {
+          top_v1.resize(0, handle.get_stream());
+          top_v2.resize(0, handle.get_stream());
+          top_score.resize(0, handle.get_stream());
         }
       }
+
+      if (top_score.size() == *topk) {
+        raft::update_host(
+          &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream());
+      }
+      if constexpr (multi_gpu) {
+        similarity_threshold =
+          host_scalar_bcast(handle.get_comms(), similarity_threshold, int{0}, handle.get_stream());
+      }
     }
 
     return std::make_tuple(std::move(top_v1), std::move(top_v2), std::move(top_score));
diff --git a/cpp/src/sampling/detail/conversion_utilities.cu b/cpp/src/sampling/detail/conversion_utilities.cu
new file mode 100644
index 00000000000..0279735dc1f
--- /dev/null
+++ b/cpp/src/sampling/detail/conversion_utilities.cu
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sampling/detail/sampling_utils.hpp"
+
+#include <cugraph/utilities/misc_utils.cuh>
+
+namespace cugraph {
+namespace detail {
+
+rmm::device_uvector<int32_t> convert_starting_vertex_label_offsets_to_labels(
+  raft::handle_t const& handle, raft::device_span<size_t const> starting_vertex_label_offsets)
+{
+  return expand_sparse_offsets(starting_vertex_label_offsets, int32_t{0}, handle.get_stream());
+}
+
+template <typename label_t>
+rmm::device_uvector<int32_t> flatten_label_map(
+  raft::handle_t const& handle,
+  std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>
+    label_to_output_comm_rank)
+{
+  label_t max_label = thrust::reduce(handle.get_thrust_policy(),
+                                     std::get<0>(label_to_output_comm_rank).begin(),
+                                     std::get<0>(label_to_output_comm_rank).end(),
+                                     label_t{0},
+                                     thrust::maximum<label_t>());
+
+  rmm::device_uvector<int32_t> label_map(max_label + 1, handle.get_stream());
+
+  thrust::fill(handle.get_thrust_policy(), label_map.begin(), label_map.end(), int32_t{0});
+  thrust::scatter(handle.get_thrust_policy(),
+                  std::get<1>(label_to_output_comm_rank).begin(),
+                  std::get<1>(label_to_output_comm_rank).end(),
+                  std::get<0>(label_to_output_comm_rank).begin(),
+                  label_map.begin());
+
+  return label_map;
+}
+
+template rmm::device_uvector<int32_t> flatten_label_map(
+  raft::handle_t const& handle,
+  std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>
+    label_to_output_comm_rank);
+
+}  // namespace detail
+}  // namespace cugraph
diff --git a/cpp/src/sampling/detail/sampling_utils.hpp b/cpp/src/sampling/detail/sampling_utils.hpp
index 102f9ec58f7..17eb8dd0873 100644
--- a/cpp/src/sampling/detail/sampling_utils.hpp
+++ b/cpp/src/sampling/detail/sampling_utils.hpp
@@ -293,7 +293,41 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<edge_type_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<label_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank);
+
+/**
+ * @brief   Convert the starting vertex offsets into starting vertex labels
+ *
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param starting_vertex_label_offsets Offsets array defining where each vertex label begins
+ *
+ * @returns device vector containing labels for each starting vertex
+ */
+rmm::device_uvector<int32_t> convert_starting_vertex_label_offsets_to_labels(
+  raft::handle_t const& handle, raft::device_span<size_t const> starting_vertex_label_offsets);
+
+/**
+ * @brief   Flatten the legacy label_to_output_comm_rank into the new structure
+ *
+ * Legacy structure supported arbitrary labels, the new structure is a dense mapping of labels from
+ * [0,n).
+ *
+ * @tparam label_t typename for the label
+ *
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param label_to_output_comm_rank  A tuple containing label ids and the comm rank each label
+ * should be assigned to
+ *
+ * @returns device vector containing the mapping to comm_rank.  Entry `i` will be the comm rank
+ * destination for label `i`.
+ */
+template <typename label_t>
+rmm::device_uvector<int32_t> flatten_label_map(
+  raft::handle_t const& handle,
+  std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>
     label_to_output_comm_rank);
+
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
index ec14e99baec..391dd99b1df 100644
--- a/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
+++ b/cpp/src/sampling/detail/shuffle_and_organize_output_impl.cuh
@@ -41,14 +41,12 @@ namespace detail {
 
 template <typename label_t>
 struct shuffle_to_output_comm_rank_t {
-  raft::device_span<label_t const> output_label_;
   raft::device_span<int32_t const> output_rank_;
 
   template <typename key_t>
   __device__ int32_t operator()(key_t key) const
   {
-    auto pos = thrust::lower_bound(thrust::seq, output_label_.begin(), output_label_.end(), key);
-    return output_rank_[thrust::distance(output_label_.begin(), pos)];
+    return output_rank_[key];
   }
 };
 
@@ -206,8 +204,7 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<edge_type_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<label_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank)
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank)
 {
   std::optional<rmm::device_uvector<size_t>> offsets{std::nullopt};
 
@@ -215,8 +212,6 @@ shuffle_and_organize_output(
     sort_sampled_tuples(handle, majors, minors, weights, edge_ids, edge_types, hops, *labels);
 
     if (label_to_output_comm_rank) {
-      CUGRAPH_EXPECTS(labels, "labels must be specified in order to shuffle sampling results");
-
       auto& comm           = handle.get_comms();
       auto const comm_size = comm.get_size();
 
@@ -247,8 +242,7 @@ shuffle_and_organize_output(
                                           edge_ids->begin(),
                                           edge_types->begin(),
                                           hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -282,8 +276,7 @@ shuffle_and_organize_output(
                                           weights->begin(),
                                           edge_ids->begin(),
                                           edge_types->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -317,8 +310,7 @@ shuffle_and_organize_output(
                                           weights->begin(),
                                           edge_ids->begin(),
                                           hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -347,8 +339,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), weights->begin(), edge_ids->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -383,8 +374,7 @@ shuffle_and_organize_output(
                                           weights->begin(),
                                           edge_types->begin(),
                                           hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -413,8 +403,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), weights->begin(), edge_types->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -444,8 +433,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), weights->begin(), hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -471,8 +459,7 @@ shuffle_and_organize_output(
                 labels->begin(),
                 labels->end(),
                 thrust::make_zip_iterator(majors.begin(), minors.begin(), weights->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -505,8 +492,7 @@ shuffle_and_organize_output(
                                           edge_ids->begin(),
                                           edge_types->begin(),
                                           hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -535,8 +521,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), edge_ids->begin(), edge_types->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -566,8 +551,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), edge_ids->begin(), hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -593,8 +577,7 @@ shuffle_and_organize_output(
                 labels->begin(),
                 labels->end(),
                 thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_ids->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -623,8 +606,7 @@ shuffle_and_organize_output(
                 labels->end(),
                 thrust::make_zip_iterator(
                   majors.begin(), minors.begin(), edge_types->begin(), hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -651,8 +633,7 @@ shuffle_and_organize_output(
                 labels->begin(),
                 labels->end(),
                 thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_types->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -678,8 +659,7 @@ shuffle_and_organize_output(
                 labels->begin(),
                 labels->end(),
                 thrust::make_zip_iterator(majors.begin(), minors.begin(), hops->begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
@@ -702,8 +682,7 @@ shuffle_and_organize_output(
                 labels->begin(),
                 labels->end(),
                 thrust::make_zip_iterator(majors.begin(), minors.begin()),
-                shuffle_to_output_comm_rank_t<label_t>{std::get<0>(*label_to_output_comm_rank),
-                                                       std::get<1>(*label_to_output_comm_rank)},
+                shuffle_to_output_comm_rank_t<label_t>{*label_to_output_comm_rank},
                 comm_size,
                 mem_frugal_threshold,
                 handle.get_stream());
diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu b/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu
index 73a152487ca..4a264469c97 100644
--- a/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu
+++ b/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v32_e32.cu
@@ -36,8 +36,7 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<int32_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<int32_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank);
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank);
 
 template std::tuple<rmm::device_uvector<int32_t>,
                     rmm::device_uvector<int32_t>,
@@ -56,8 +55,7 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<int32_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<int32_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank);
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu b/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu
index ff7a716e609..f66ce3e2d63 100644
--- a/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu
+++ b/cpp/src/sampling/detail/shuffle_and_organize_output_mg_v64_e64.cu
@@ -36,8 +36,7 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<int32_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<int32_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank);
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank);
 
 template std::tuple<rmm::device_uvector<int64_t>,
                     rmm::device_uvector<int64_t>,
@@ -56,8 +55,7 @@ shuffle_and_organize_output(
   std::optional<rmm::device_uvector<int32_t>>&& edge_types,
   std::optional<rmm::device_uvector<int32_t>>&& hops,
   std::optional<rmm::device_uvector<int32_t>>&& labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank);
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank);
 
 }  // namespace detail
 }  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp
index d8e8cc2b756..ccca71cdf20 100644
--- a/cpp/src/sampling/neighbor_sampling_impl.hpp
+++ b/cpp/src/sampling/neighbor_sampling_impl.hpp
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include "prims/fill_edge_property.cuh"
+#include "prims/transform_e.cuh"
 #include "sampling/detail/sampling_utils.hpp"
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
@@ -48,41 +50,34 @@ std::tuple<rmm::device_uvector<vertex_t>,
            std::optional<rmm::device_uvector<int32_t>>,
            std::optional<rmm::device_uvector<label_t>>,
            std::optional<rmm::device_uvector<size_t>>>
-neighbor_sample_impl(
-  raft::handle_t const& handle,
-  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
-  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
-  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
-  std::optional<edge_property_view_t<edge_t, bias_t const*>> edge_bias_view,
-  raft::device_span<vertex_t const> this_frontier_vertices,
-  std::optional<raft::device_span<label_t const>> this_frontier_vertex_labels,
-  std::optional<std::tuple<raft::device_span<label_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  raft::random::RngState& rng_state,
-  bool do_expensive_check)
+neighbor_sample_impl(raft::handle_t const& handle,
+                     raft::random::RngState& rng_state,
+                     graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+                     std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+                     std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+                     std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+                     std::optional<edge_property_view_t<edge_t, bias_t const*>> edge_bias_view,
+                     raft::device_span<vertex_t const> starting_vertices,
+                     std::optional<raft::device_span<label_t const>> starting_vertex_labels,
+                     std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+                     raft::host_span<int32_t const> fan_out,
+                     edge_type_t num_edge_types,
+                     bool return_hops,
+                     bool with_replacement,
+                     prior_sources_behavior_t prior_sources_behavior,
+                     bool dedupe_sources,
+                     bool do_expensive_check)
 {
   static_assert(std::is_floating_point_v<bias_t>);
 
-  CUGRAPH_EXPECTS(fan_out.size() > 0, "Invalid input argument: number of levels must be non-zero.");
-  CUGRAPH_EXPECTS(
-    fan_out.size() <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
-    "Invalid input argument: number of levels should not overflow int32_t");  // as we use int32_t
-                                                                              // to store hops
-
   if constexpr (!multi_gpu) {
     CUGRAPH_EXPECTS(!label_to_output_comm_rank,
                     "cannot specify output GPU mapping in SG implementation");
   }
 
   CUGRAPH_EXPECTS(
-    !label_to_output_comm_rank || this_frontier_vertex_labels,
-    "cannot specify output GPU mapping without also specifying this_frontier_vertex_labels");
+    !label_to_output_comm_rank || starting_vertex_labels,
+    "cannot specify output GPU mapping without also specifying starting_vertex_labels");
 
   if (do_expensive_check) {
     if (edge_bias_view) {
@@ -96,10 +91,45 @@ neighbor_sample_impl(
                       "Invalid input argument: sum of neighboring edge bias values should not "
                       "exceed std::numeric_limits<bias_t>::max() for any vertex.");
     }
+  }
+
+  CUGRAPH_EXPECTS(fan_out.size() > 0, "Invalid input argument: number of levels must be non-zero.");
+  CUGRAPH_EXPECTS(
+    fan_out.size() <= static_cast<size_t>(std::numeric_limits<int32_t>::max()),
+    "Invalid input argument: number of levels should not overflow int32_t");  // as we use int32_t
+                                                                              // to store hops
 
-    if (label_to_output_comm_rank) {
-      CUGRAPH_EXPECTS(cugraph::detail::is_sorted(handle, std::get<0>(*label_to_output_comm_rank)),
-                      "Labels in label_to_output_comm_rank must be sorted");
+  std::vector<
+    cugraph::edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, bool>>
+    edge_masks_vector{};
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> modified_graph_view = graph_view;
+  edge_masks_vector.reserve(num_edge_types);
+
+  if (num_edge_types > 1) {
+    for (int i = 0; i < num_edge_types; i++) {
+      cugraph::edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, bool>
+        edge_mask(handle, graph_view);
+
+      cugraph::fill_edge_property(
+        handle, modified_graph_view, edge_mask.mutable_view(), bool{true});
+
+      cugraph::transform_e(
+        handle,
+        modified_graph_view,
+        cugraph::edge_src_dummy_property_t{}.view(),
+        cugraph::edge_dst_dummy_property_t{}.view(),
+        *edge_type_view,
+        [valid_edge_type = i] __device__(auto src,
+                                         auto dst,
+                                         thrust::nullopt_t,
+                                         thrust::nullopt_t,
+                                         /*thrust::nullopt_t*/ auto edge_type) {
+          return edge_type == valid_edge_type;
+        },
+        edge_mask.mutable_view(),
+        false);
+
+      edge_masks_vector.push_back(std::move(edge_mask));
     }
   }
 
@@ -114,8 +144,8 @@ neighbor_sample_impl(
     edge_type_view ? std::make_optional(std::vector<rmm::device_uvector<edge_type_t>>{})
                    : std::nullopt;
   auto level_result_label_vectors =
-    this_frontier_vertex_labels ? std::make_optional(std::vector<rmm::device_uvector<label_t>>{})
-                                : std::nullopt;
+    starting_vertex_labels ? std::make_optional(std::vector<rmm::device_uvector<label_t>>{})
+                           : std::nullopt;
 
   level_result_src_vectors.reserve(fan_out.size());
   level_result_dst_vectors.reserve(fan_out.size());
@@ -126,7 +156,7 @@ neighbor_sample_impl(
 
   rmm::device_uvector<vertex_t> frontier_vertices(0, handle.get_stream());
   auto frontier_vertex_labels =
-    this_frontier_vertex_labels
+    starting_vertex_labels
       ? std::make_optional(rmm::device_uvector<label_t>{0, handle.get_stream()})
       : std::nullopt;
 
@@ -137,84 +167,95 @@ neighbor_sample_impl(
   if (prior_sources_behavior == prior_sources_behavior_t::EXCLUDE) {
     vertex_used_as_source = std::make_optional(
       std::make_tuple(rmm::device_uvector<vertex_t>{0, handle.get_stream()},
-                      this_frontier_vertex_labels
+                      starting_vertex_labels
                         ? std::make_optional(rmm::device_uvector<label_t>{0, handle.get_stream()})
                         : std::nullopt));
   }
 
   std::vector<size_t> level_sizes{};
-  int32_t hop{0};
-  for (auto&& k_level : fan_out) {
-    rmm::device_uvector<vertex_t> srcs(0, handle.get_stream());
-    rmm::device_uvector<vertex_t> dsts(0, handle.get_stream());
-    std::optional<rmm::device_uvector<weight_t>> weights{std::nullopt};
-    std::optional<rmm::device_uvector<edge_t>> edge_ids{std::nullopt};
-    std::optional<rmm::device_uvector<edge_type_t>> edge_types{std::nullopt};
-    std::optional<rmm::device_uvector<int32_t>> labels{std::nullopt};
-
-    if (k_level > 0) {
-      std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) =
-        sample_edges(handle,
-                     graph_view,
-                     edge_weight_view,
-                     edge_id_view,
-                     edge_type_view,
-                     edge_bias_view,
-                     rng_state,
-                     this_frontier_vertices,
-                     this_frontier_vertex_labels,
-                     static_cast<size_t>(k_level),
-                     with_replacement);
-    } else {
-      std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) =
-        gather_one_hop_edgelist(handle,
-                                graph_view,
-                                edge_weight_view,
-                                edge_id_view,
-                                edge_type_view,
-                                this_frontier_vertices,
-                                this_frontier_vertex_labels);
-    }
 
-    level_sizes.push_back(srcs.size());
-
-    level_result_src_vectors.push_back(std::move(srcs));
-    level_result_dst_vectors.push_back(std::move(dsts));
-    if (weights) { (*level_result_weight_vectors).push_back(std::move(*weights)); }
-    if (edge_ids) { (*level_result_edge_id_vectors).push_back(std::move(*edge_ids)); }
-    if (edge_types) { (*level_result_edge_type_vectors).push_back(std::move(*edge_types)); }
-    if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); }
-
-    ++hop;
-    if (hop < fan_out.size()) {
-      // FIXME:  We should modify vertex_partition_range_lasts to return a raft::host_span
-      //  rather than making a copy.
-      auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
-      std::tie(frontier_vertices, frontier_vertex_labels, vertex_used_as_source) =
-        prepare_next_frontier(
-          handle,
-          this_frontier_vertices,
-          this_frontier_vertex_labels,
-          raft::device_span<vertex_t const>{level_result_dst_vectors.back().data(),
-                                            level_result_dst_vectors.back().size()},
-          frontier_vertex_labels ? std::make_optional(raft::device_span<label_t const>(
-                                     level_result_label_vectors->back().data(),
-                                     level_result_label_vectors->back().size()))
-                                 : std::nullopt,
-          std::move(vertex_used_as_source),
-          graph_view.local_vertex_partition_view(),
-          vertex_partition_range_lasts,
-          prior_sources_behavior,
-          dedupe_sources,
-          do_expensive_check);
-
-      this_frontier_vertices =
-        raft::device_span<vertex_t const>(frontier_vertices.data(), frontier_vertices.size());
-
-      if (frontier_vertex_labels) {
-        this_frontier_vertex_labels = raft::device_span<label_t const>(
-          frontier_vertex_labels->data(), frontier_vertex_labels->size());
+  // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1
+  auto num_hops = ((fan_out.size() % num_edge_types) == 0)
+                    ? (fan_out.size() / num_edge_types)
+                    : ((fan_out.size() / num_edge_types) + 1);
+
+  for (auto hop = 0; hop < num_hops; hop++) {
+    for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) {
+      auto k_level = fan_out[(hop * num_edge_types) + edge_type_id];
+      rmm::device_uvector<vertex_t> srcs(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> dsts(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> weights{std::nullopt};
+      std::optional<rmm::device_uvector<edge_t>> edge_ids{std::nullopt};
+      std::optional<rmm::device_uvector<edge_type_t>> edge_types{std::nullopt};
+      std::optional<rmm::device_uvector<int32_t>> labels{std::nullopt};
+
+      if (num_edge_types > 1) {
+        modified_graph_view.attach_edge_mask(edge_masks_vector[edge_type_id].view());
+      }
+
+      if (k_level > 0) {
+        std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) =
+          sample_edges(handle,
+                       modified_graph_view,
+                       edge_weight_view,
+                       edge_id_view,
+                       edge_type_view,
+                       edge_bias_view,
+                       rng_state,
+                       starting_vertices,
+                       starting_vertex_labels,
+                       static_cast<size_t>(k_level),
+                       with_replacement);
+      } else {
+        std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) =
+          gather_one_hop_edgelist(handle,
+                                  modified_graph_view,
+                                  edge_weight_view,
+                                  edge_id_view,
+                                  edge_type_view,
+                                  starting_vertices,
+                                  starting_vertex_labels);
       }
+
+      level_sizes.push_back(srcs.size());
+      level_result_src_vectors.push_back(std::move(srcs));
+      level_result_dst_vectors.push_back(std::move(dsts));
+
+      if (weights) { (*level_result_weight_vectors).push_back(std::move(*weights)); }
+      if (edge_ids) { (*level_result_edge_id_vectors).push_back(std::move(*edge_ids)); }
+      if (edge_types) { (*level_result_edge_type_vectors).push_back(std::move(*edge_types)); }
+      if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); }
+
+      if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); }
+    }
+
+    // FIXME:  We should modify vertex_partition_range_lasts to return a raft::host_span
+    //  rather than making a copy.
+    auto vertex_partition_range_lasts = modified_graph_view.vertex_partition_range_lasts();
+    std::tie(frontier_vertices, frontier_vertex_labels, vertex_used_as_source) =
+      prepare_next_frontier(
+        handle,
+        starting_vertices,
+        starting_vertex_labels,
+        raft::device_span<vertex_t const>{level_result_dst_vectors.back().data(),
+                                          level_result_dst_vectors.back().size()},
+        frontier_vertex_labels
+          ? std::make_optional(raft::device_span<label_t const>(
+              level_result_label_vectors->back().data(), level_result_label_vectors->back().size()))
+          : std::nullopt,
+        std::move(vertex_used_as_source),
+        modified_graph_view.local_vertex_partition_view(),
+        vertex_partition_range_lasts,
+        prior_sources_behavior,
+        dedupe_sources,
+        do_expensive_check);
+
+    starting_vertices =
+      raft::device_span<vertex_t const>(frontier_vertices.data(), frontier_vertices.size());
+
+    if (frontier_vertex_labels) {
+      starting_vertex_labels = raft::device_span<label_t const>(frontier_vertex_labels->data(),
+                                                                frontier_vertex_labels->size());
     }
   }
 
@@ -368,8 +409,16 @@ uniform_neighbor_sample(
   bool do_expensive_check)
 {
   using bias_t = weight_t;  // dummy
+
+  rmm::device_uvector<int32_t> label_map(0, handle.get_stream());
+
+  if (label_to_output_comm_rank) {
+    label_map = detail::flatten_label_map(handle, *label_to_output_comm_rank);
+  }
+
   return detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
     handle,
+    rng_state,
     graph_view,
     edge_weight_view,
     edge_id_view,
@@ -377,13 +426,15 @@ uniform_neighbor_sample(
     std::nullopt,
     starting_vertices,
     starting_vertex_labels,
-    label_to_output_comm_rank,
+    label_to_output_comm_rank
+      ? std::make_optional(raft::device_span<int32_t const>{label_map.data(), label_map.size()})
+      : std::nullopt,
     fan_out,
+    edge_type_t{1},
     return_hops,
     with_replacement,
     prior_sources_behavior,
     dedupe_sources,
-    rng_state,
     do_expensive_check);
 }
 
@@ -422,8 +473,15 @@ biased_neighbor_sample(
   bool dedupe_sources,
   bool do_expensive_check)
 {
+  rmm::device_uvector<int32_t> label_map(0, handle.get_stream());
+
+  if (label_to_output_comm_rank) {
+    label_map = detail::flatten_label_map(handle, *label_to_output_comm_rank);
+  }
+
   return detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
     handle,
+    rng_state,
     graph_view,
     edge_weight_view,
     edge_id_view,
@@ -431,14 +489,252 @@ biased_neighbor_sample(
     edge_bias_view,
     starting_vertices,
     starting_vertex_labels,
-    label_to_output_comm_rank,
+    label_to_output_comm_rank
+      ? std::make_optional(raft::device_span<int32_t const>{label_map.data(), label_map.size()})
+      : std::nullopt,
     fan_out,
+    edge_type_t{1},
     return_hops,
     with_replacement,
     prior_sources_behavior,
     dedupe_sources,
-    rng_state,
     do_expensive_check);
 }
 
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  edge_type_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check)
+{
+  using bias_t = weight_t;  // dummy
+
+  auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] =
+    detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
+      handle,
+      rng_state,
+      graph_view,
+      edge_weight_view,
+      edge_id_view,
+      edge_type_view,
+      std::optional<edge_property_view_t<edge_t, bias_t const*>>{
+        std::nullopt},  // Optional edge_bias_view
+      starting_vertices,
+      starting_vertex_labels,
+      label_to_output_comm_rank,
+      fan_out,
+      num_edge_types,
+      sampling_flags.return_hops,
+      sampling_flags.with_replacement,
+      sampling_flags.prior_sources_behavior,
+      sampling_flags.dedupe_sources,
+      do_expensive_check);
+
+  return std::make_tuple(std::move(majors),
+                         std::move(minors),
+                         std::move(weights),
+                         std::move(edge_ids),
+                         std::move(edge_types),
+                         std::move(hops),
+                         std::move(offsets));
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          typename bias_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  edge_property_view_t<edge_t, bias_t const*> edge_bias_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  edge_type_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check)
+{
+  auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] =
+    detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
+      handle,
+      rng_state,
+      graph_view,
+      edge_weight_view,
+      edge_id_view,
+      edge_type_view,
+      std::make_optional(edge_bias_view),
+      starting_vertices,
+      starting_vertex_labels,
+      label_to_output_comm_rank,
+      fan_out,
+      num_edge_types,
+      sampling_flags.return_hops,
+      sampling_flags.with_replacement,
+      sampling_flags.prior_sources_behavior,
+      sampling_flags.dedupe_sources,
+      do_expensive_check);
+
+  return std::make_tuple(std::move(majors),
+                         std::move(minors),
+                         std::move(weights),
+                         std::move(edge_ids),
+                         std::move(edge_types),
+                         std::move(hops),
+                         std::move(offsets));
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check)
+{
+  using bias_t = weight_t;  // dummy
+
+  auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] =
+    detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
+      handle,
+      rng_state,
+      graph_view,
+      edge_weight_view,
+      edge_id_view,
+      edge_type_view,
+      std::optional<edge_property_view_t<edge_t, bias_t const*>>{
+        std::nullopt},  // Optional edge_bias_view
+      starting_vertices,
+      starting_vertex_labels,
+      label_to_output_comm_rank,
+      fan_out,
+      edge_type_t{1},
+      sampling_flags.return_hops,
+      sampling_flags.with_replacement,
+      sampling_flags.prior_sources_behavior,
+      sampling_flags.dedupe_sources,
+      do_expensive_check);
+
+  return std::make_tuple(std::move(majors),
+                         std::move(minors),
+                         std::move(weights),
+                         std::move(edge_ids),
+                         std::move(edge_types),
+                         std::move(hops),
+                         std::move(offsets));
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          typename edge_type_t,
+          typename bias_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           std::optional<rmm::device_uvector<weight_t>>,
+           std::optional<rmm::device_uvector<edge_t>>,
+           std::optional<rmm::device_uvector<edge_type_t>>,
+           std::optional<rmm::device_uvector<int32_t>>,
+           std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+  std::optional<edge_property_view_t<edge_t, edge_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<edge_t, edge_type_t const*>> edge_type_view,
+  edge_property_view_t<edge_t, bias_t const*> edge_bias_view,
+  raft::device_span<vertex_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check)
+{
+  auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] =
+    detail::neighbor_sample_impl<vertex_t, edge_t, weight_t, edge_type_t, bias_t>(
+      handle,
+      rng_state,
+      graph_view,
+      edge_weight_view,
+      edge_id_view,
+      edge_type_view,
+      std::make_optional(edge_bias_view),
+      starting_vertices,
+      starting_vertex_labels,
+      label_to_output_comm_rank,
+      fan_out,
+      edge_type_t{1},
+      sampling_flags.return_hops,
+      sampling_flags.with_replacement,
+      sampling_flags.prior_sources_behavior,
+      sampling_flags.dedupe_sources,
+      do_expensive_check);
+
+  return std::make_tuple(std::move(majors),
+                         std::move(minors),
+                         std::move(weights),
+                         std::move(edge_ids),
+                         std::move(edge_types),
+                         std::move(hops),
+                         std::move(offsets));
+}
+
 }  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp
deleted file mode 100644
index f61c1c10c53..00000000000
--- a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "neighbor_sampling_impl.hpp"
-
-#include <cugraph/algorithms.hpp>
-#include <cugraph/sampling_functions.hpp>
-
-namespace cugraph {
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int32_t, float const*> edge_bias_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int32_t, double const*> edge_bias_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu
new file mode 100644
index 00000000000..d848935cc7e
--- /dev/null
+++ b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "neighbor_sampling_impl.hpp"
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/sampling_functions.hpp>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp b/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp
deleted file mode 100644
index ea3f6b466da..00000000000
--- a/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "neighbor_sampling_impl.hpp"
-
-#include <cugraph/algorithms.hpp>
-#include <cugraph/sampling_functions.hpp>
-
-namespace cugraph {
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int64_t, float const*> edge_bias_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int64_t, double const*> edge_bias_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cu b/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cu
new file mode 100644
index 00000000000..505deec51f5
--- /dev/null
+++ b/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cu
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "neighbor_sampling_impl.hpp"
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/sampling_functions.hpp>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, true> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp
deleted file mode 100644
index 0f0affbb323..00000000000
--- a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "neighbor_sampling_impl.hpp"
-
-#include <cugraph/algorithms.hpp>
-#include <cugraph/sampling_functions.hpp>
-
-namespace cugraph {
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int32_t, float const*> edge_bias_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int32_t>,
-                    rmm::device_uvector<int32_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int32_t, double const*> edge_bias_view,
-  raft::device_span<int32_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu
new file mode 100644
index 00000000000..72bbb4e27a8
--- /dev/null
+++ b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "neighbor_sampling_impl.hpp"
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/sampling_functions.hpp>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, double const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int32_t, int32_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int32_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int32_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int32_t, float const*> edge_bias_view,
+  raft::device_span<int32_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp b/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp
deleted file mode 100644
index 70dd9a59842..00000000000
--- a/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "neighbor_sampling_impl.hpp"
-
-#include <cugraph/algorithms.hpp>
-#include <cugraph/sampling_functions.hpp>
-
-namespace cugraph {
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-uniform_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<float>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int64_t, float const*> edge_bias_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-template std::tuple<rmm::device_uvector<int64_t>,
-                    rmm::device_uvector<int64_t>,
-                    std::optional<rmm::device_uvector<double>>,
-                    std::optional<rmm::device_uvector<int64_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<int32_t>>,
-                    std::optional<rmm::device_uvector<size_t>>>
-biased_neighbor_sample(
-  raft::handle_t const& handle,
-  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
-  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
-  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
-  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
-  edge_property_view_t<int64_t, double const*> edge_bias_view,
-  raft::device_span<int64_t const> starting_vertices,
-  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
-  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
-    label_to_output_comm_rank,
-  raft::host_span<int32_t const> fan_out,
-  raft::random::RngState& rng_state,
-  bool return_hops,
-  bool with_replacement,
-  prior_sources_behavior_t prior_sources_behavior,
-  bool dedupe_sources,
-  bool do_expensive_check);
-
-}  // namespace cugraph
diff --git a/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cu b/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cu
new file mode 100644
index 00000000000..6aa8c71429a
--- /dev/null
+++ b/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cu
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "neighbor_sampling_impl.hpp"
+
+#include <cugraph/algorithms.hpp>
+#include <cugraph/sampling_functions.hpp>
+
+namespace cugraph {
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+biased_neighbor_sample(
+  raft::handle_t const& handle,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<std::tuple<raft::device_span<int32_t const>, raft::device_span<int32_t const>>>
+    label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  raft::random::RngState& rng_state,
+  bool return_hops,
+  bool with_replacement,
+  prior_sources_behavior_t prior_sources_behavior,
+  bool dedupe_sources,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+heterogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  int32_t num_edge_types,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_uniform_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<double>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, double const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, double const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    std::optional<rmm::device_uvector<float>>,
+                    std::optional<rmm::device_uvector<int64_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<int32_t>>,
+                    std::optional<rmm::device_uvector<size_t>>>
+homogeneous_biased_neighbor_sample(
+  raft::handle_t const& handle,
+  raft::random::RngState& rng_state,
+  graph_view_t<int64_t, int64_t, false, false> const& graph_view,
+  std::optional<edge_property_view_t<int64_t, float const*>> edge_weight_view,
+  std::optional<edge_property_view_t<int64_t, int64_t const*>> edge_id_view,
+  std::optional<edge_property_view_t<int64_t, int32_t const*>> edge_type_view,
+  edge_property_view_t<int64_t, float const*> edge_bias_view,
+  raft::device_span<int64_t const> starting_vertices,
+  std::optional<raft::device_span<int32_t const>> starting_vertex_labels,
+  std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank,
+  raft::host_span<int32_t const> fan_out,
+  sampling_flags_t sampling_flags,
+  bool do_expensive_check);
+
+}  // namespace cugraph
diff --git a/cpp/src/utilities/shuffle_vertices.cuh b/cpp/src/utilities/shuffle_vertices.cuh
index adce03f7c29..5ed6513816f 100644
--- a/cpp/src/utilities/shuffle_vertices.cuh
+++ b/cpp/src/utilities/shuffle_vertices.cuh
@@ -44,22 +44,43 @@ rmm::device_uvector<vertex_t> shuffle_vertices_by_gpu_id_impl(
   return d_rx_vertices;
 }
 
-template <typename vertex_t, typename value_t, typename func_t>
-std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<value_t>>
-shuffle_vertices_and_values_by_gpu_id_impl(raft::handle_t const& handle,
-                                           rmm::device_uvector<vertex_t>&& d_vertices,
-                                           rmm::device_uvector<value_t>&& d_values,
-                                           func_t func)
+template <typename vertex_t, typename value0_t, typename value1_t, typename func_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<value0_t>,
+           std::optional<rmm::device_uvector<value1_t>>>
+shuffle_vertices_and_values_by_gpu_id_impl(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& d_vertices,
+  rmm::device_uvector<value0_t>&& d_values_0,
+  std::optional<rmm::device_uvector<value1_t>>&& d_values_1,
+  func_t func)
 {
-  std::tie(d_vertices, d_values, std::ignore) = cugraph::groupby_gpu_id_and_shuffle_kv_pairs(
-    handle.get_comms(),
-    d_vertices.begin(),
-    d_vertices.end(),
-    d_values.begin(),
-    [key_func = func] __device__(auto val) { return key_func(val); },
-    handle.get_stream());
-
-  return std::make_tuple(std::move(d_vertices), std::move(d_values));
+  if (d_values_1) {
+    auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs(
+      handle.get_comms(),
+      d_vertices.begin(),
+      d_vertices.end(),
+      thrust::make_zip_iterator(d_values_0.begin(), (*d_values_1).begin()),
+      [key_func = func] __device__(auto val) { return key_func(val); },
+      handle.get_stream());
+
+    return std::make_tuple(std::move(d_shuffled_vertices),
+                           std::move(std::get<0>(d_values)),
+                           std::make_optional(std::move(std::get<1>(d_values))));
+  } else {
+    auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs(
+      handle.get_comms(),
+      d_vertices.begin(),
+      d_vertices.end(),
+      d_values_0.begin(),
+      [key_func = func] __device__(auto val) { return key_func(val); },
+      handle.get_stream());
+
+    auto d_values_1 = std::optional<rmm::device_uvector<int32_t>>{std::nullopt};
+
+    return std::make_tuple(
+      std::move(d_shuffled_vertices), std::move(d_values), std::move(d_values_1));
+  }
 }
 
 }  // namespace
@@ -96,12 +117,18 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
   auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
   auto const minor_comm_size = minor_comm.get_size();
 
-  return shuffle_vertices_and_values_by_gpu_id_impl(
+  rmm::device_uvector<vertex_t> d_vertices(0, handle.get_stream());
+  rmm::device_uvector<value_t> d_values(0, handle.get_stream());
+
+  std::tie(d_vertices, d_values, std::ignore) = shuffle_vertices_and_values_by_gpu_id_impl(
     handle,
     std::move(vertices),
     std::move(values),
+    std::optional<rmm::device_uvector<int32_t>>{std::nullopt},
     cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
       comm_size, major_comm_size, minor_comm_size});
+
+  return std::make_tuple(std::move(d_vertices), std::move(d_values));
 }
 
 template <typename vertex_t>
@@ -154,17 +181,21 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
   auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
   auto const minor_comm_size = minor_comm.get_size();
 
-  auto return_value = shuffle_vertices_and_values_by_gpu_id_impl(
+  rmm::device_uvector<vertex_t> d_vertices(0, handle.get_stream());
+  rmm::device_uvector<value_t> d_values(0, handle.get_stream());
+
+  std::tie(d_vertices, d_values, std::ignore) = shuffle_vertices_and_values_by_gpu_id_impl(
     handle,
     std::move(vertices),
     std::move(values),
+    std::optional<rmm::device_uvector<int32_t>>{std::nullopt},
     cugraph::detail::compute_gpu_id_from_int_vertex_t<vertex_t>{
       raft::device_span<vertex_t const>(d_vertex_partition_range_lasts.data(),
                                         d_vertex_partition_range_lasts.size()),
       major_comm_size,
       minor_comm_size});
 
-  return return_value;
+  return std::make_tuple(std::move(d_vertices), std::move(d_values));
 }
 
 }  // namespace detail
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 3752e823659..a2eeafea8cf 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -483,10 +483,29 @@ ConfigureTest(RANDOM_WALKS_TEST sampling/sg_random_walks_test.cpp)
 # - UNIFORM NBR SAMPLING tests --------------------------------------------------------------------
 ConfigureTest(UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/uniform_neighbor_sampling.cpp)
 
+# - HOMOGENEOUS UNIFORM NBR SAMPLING tests --------------------------------------------------------
+ConfigureTest(
+    HOMOGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/homogeneous_uniform_neighbor_sampling.cpp)
+
+# - HETEROGENEOUS UNIFORM NBR SAMPLING tests -----------------------------------------------------
+ConfigureTest(
+    HETEROGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/heterogeneous_uniform_neighbor_sampling.cpp)
+
 ###################################################################################################
 # - BIASED NBR SAMPLING tests ---------------------------------------------------------------------
 ConfigureTest(BIASED_NEIGHBOR_SAMPLING_TEST sampling/biased_neighbor_sampling.cpp)
 
+###################################################################################################
+# - HOMOGENEOUS BIASED NBR SAMPLING tests ---------------------------------------------------------
+ConfigureTest(
+    HOMOGENEOUS_BIASED_NEIGHBOR_SAMPLING_TEST sampling/homogeneous_biased_neighbor_sampling.cpp)
+
+###################################################################################################
+# - HETEROGENEOUS BIASED NBR SAMPLING tests -------------------------------------------------------
+ConfigureTest(
+    HETEROGENEOUS_BIASED_NEIGHBOR_SAMPLING_TESTT sampling/heterogeneous_biased_neighbor_sampling.cpp
+        GPUS 1 PERCENT 75)
+
 ###################################################################################################
 # - SAMPLING_POST_PROCESSING tests ----------------------------------------------------------------
 ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_test.cpp)
@@ -751,6 +770,26 @@ if(BUILD_CUGRAPH_MG_TESTS)
     # - MG UNIFORM NBR SAMPLING tests -------------------------------------------------------------
     ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cpp)
 
+    ###############################################################################################
+    # - MG HOMOGENEOUS UNIFORM NBR SAMPLING tests -------------------------------------------------
+    ConfigureTestMG(
+        MG_HOMOGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_homogeneous_uniform_neighbor_sampling.cpp)
+
+    ###############################################################################################
+    # - MG HETEROGENEOUS UNIFORM NBR SAMPLING tests -------------------------------------------------
+    ConfigureTestMG(
+        MG_HETEROGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp)
+
+    ###############################################################################################
+    # - MG HOMOGENEOUS BIASED NBR SAMPLING tests --------------------------------------------------
+    ConfigureTestMG(
+        MG_HOMOGENEOUS_BIASED_NEIGHBOR_SAMPLING_TEST sampling/mg_homogeneous_biased_neighbor_sampling.cpp)
+
+    ###############################################################################################
+    # - MG HETEROGENEOUS BIASED NBR SAMPLING tests --------------------------------------------------
+    ConfigureTestMG(
+        MG_HETEROGENEOUS_BIASED_NEIGHBOR_SAMPLING_TEST sampling/mg_heterogeneous_biased_neighbor_sampling.cpp)
+
     ###############################################################################################
     # - MG BIASED NBR SAMPLING tests --------------------------------------------------------------
     ConfigureTestMG(MG_BIASED_NEIGHBOR_SAMPLING_TEST sampling/mg_biased_neighbor_sampling.cpp)
diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp
index 302248fe516..87214c808da 100644
--- a/cpp/tests/link_prediction/mg_similarity_test.cpp
+++ b/cpp/tests/link_prediction/mg_similarity_test.cpp
@@ -29,7 +29,10 @@
 struct Similarity_Usecase {
   bool use_weights{false};
   bool check_correctness{true};
-  size_t max_seeds{std::numeric_limits<size_t>::max()};
+  bool all_pairs{false};
+  std::optional<size_t> max_seeds{std::nullopt};
+  std::optional<size_t> max_vertex_pairs_to_check{std::nullopt};
+  std::optional<size_t> topk{std::nullopt};
 };
 
 template <typename input_usecase_t>
@@ -80,56 +83,96 @@ class Tests_MGSimilarity
     auto mg_edge_weight_view =
       mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
 
-    rmm::device_uvector<vertex_t> d_start_vertices(
-      std::min(
-        static_cast<size_t>(mg_graph_view.local_vertex_partition_range_size()),
-        similarity_usecase.max_seeds / comm_size +
-          (static_cast<size_t>(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)),
-      handle_->get_stream());
-    cugraph::test::populate_vertex_ids(
-      *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first());
-
-    auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs(
-      *handle_,
-      mg_graph_view,
-      raft::device_span<vertex_t const>(d_start_vertices.data(), d_start_vertices.size()),
-      2);
-
-    auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices);
-    auto h_offsets        = cugraph::test::to_host(*handle_, d_offsets);
-
-    std::vector<vertex_t> h_v1(h_offsets.back());
-    for (size_t i = 0; i < h_start_vertices.size(); ++i) {
-      std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]);
-    }
+    rmm::device_uvector<vertex_t> v1(0, handle_->get_stream());
+    rmm::device_uvector<vertex_t> v2(0, handle_->get_stream());
+    rmm::device_uvector<weight_t> result_score(0, handle_->get_stream());
 
-    auto d_v1 = cugraph::test::to_device(*handle_, h_v1);
-    auto d_v2 = std::move(two_hop_nbrs);
-
-    std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) =
-      cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
-        vertex_t,
-        edge_t,
-        weight_t,
-        int32_t>(*handle_,
-                 std::move(d_v1),
-                 std::move(d_v2),
-                 std::nullopt,
-                 std::nullopt,
-                 std::nullopt,
-                 mg_graph_view.vertex_partition_range_lasts());
-
-    std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
-      {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
+    raft::random::RngState rng_state{0};
 
-    if (cugraph::test::g_perf) {
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
-      handle_->get_comms().barrier();
-      hr_timer.start("MG similarity test");
+    rmm::device_uvector<vertex_t> sources(0, handle_->get_stream());
+    std::optional<raft::device_span<vertex_t const>> sources_span{std::nullopt};
+
+    if (similarity_usecase.max_seeds) {
+      sources = cugraph::select_random_vertices(
+        *handle_,
+        mg_graph_view,
+        std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+        rng_state,
+        std::min(*similarity_usecase.max_seeds,
+                 static_cast<size_t>(mg_graph_view.number_of_vertices())),
+        false,
+        false);
+      sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
     }
 
-    auto result_score = test_functor.run(
-      *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+    if (similarity_usecase.all_pairs) {
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        handle_->get_comms().barrier();
+        hr_timer.start("MG similarity test");
+      }
+
+      std::tie(v1, v2, result_score) = test_functor.run(*handle_,
+                                                        mg_graph_view,
+                                                        mg_edge_weight_view,
+                                                        sources_span,
+                                                        similarity_usecase.use_weights,
+                                                        similarity_usecase.topk);
+    } else {
+      if (!sources_span) {
+        sources.resize(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream());
+        cugraph::test::populate_vertex_ids(
+          *handle_, sources, mg_graph_view.local_vertex_partition_range_first());
+        sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
+      }
+
+      rmm::device_uvector<size_t> offsets(0, handle_->get_stream());
+
+      std::tie(offsets, v2) = cugraph::k_hop_nbrs(*handle_, mg_graph_view, *sources_span, 2);
+
+      v1.resize(v2.size(), handle_->get_stream());
+      cugraph::test::expand_sparse_offsets(
+        *handle_,
+        raft::device_span<size_t const>{offsets.data(), offsets.size()},
+        raft::device_span<vertex_t>{v1.data(), v1.size()},
+        size_t{0},
+        vertex_t{0});
+
+      cugraph::unrenumber_local_int_vertices(*handle_,
+                                             v1.data(),
+                                             v1.size(),
+                                             sources.data(),
+                                             vertex_t{0},
+                                             static_cast<vertex_t>(sources.size()),
+                                             true);
+
+      std::tie(v1, v2) = cugraph::test::remove_self_loops(*handle_, std::move(v1), std::move(v2));
+
+      std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
+        cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
+          vertex_t,
+          edge_t,
+          weight_t,
+          int32_t>(*handle_,
+                   std::move(v1),
+                   std::move(v2),
+                   std::nullopt,
+                   std::nullopt,
+                   std::nullopt,
+                   mg_graph_view.vertex_partition_range_lasts());
+
+      std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
+        {v1.data(), v1.size()}, {v2.data(), v2.size()}};
+
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        handle_->get_comms().barrier();
+        hr_timer.start("MG similarity test");
+      }
+
+      result_score = test_functor.run(
+        *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+    }
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -147,14 +190,14 @@ class Tests_MGSimilarity
         mg_edge_weight_view,
         std::optional<raft::device_span<vertex_t const>>(std::nullopt));
 
-      d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size());
-      d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size());
+      v1 = cugraph::test::device_gatherv(*handle_, v1.data(), v1.size());
+      v2 = cugraph::test::device_gatherv(*handle_, v2.data(), v2.size());
       result_score =
         cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size());
 
-      if (d_v1.size() > 0) {
-        auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1);
-        auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2);
+      if (v1.size() > 0) {
+        auto h_vertex_pair1 = cugraph::test::to_host(*handle_, v1);
+        auto h_vertex_pair2 = cugraph::test::to_host(*handle_, v2);
         auto h_result_score = cugraph::test::to_host(*handle_, result_score);
 
         similarity_compare(mg_graph_view.number_of_vertices(),
@@ -258,10 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
   file_test,
   Tests_MGSimilarity_File,
   ::testing::Combine(
-    // enable correctness checks
-    // Disable weighted computation testing in 22.10
-    //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}),
-    ::testing::Values(Similarity_Usecase{false, true, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
                       cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));
 
@@ -273,7 +319,13 @@ INSTANTIATE_TEST_SUITE_P(
     // Disable weighted computation testing in 22.10
     //::testing::Values(Similarity_Usecase{true, true, 20},
     // Similarity_Usecase{false, true, 20}),
-    ::testing::Values(Similarity_Usecase{false, true, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
 
 INSTANTIATE_TEST_SUITE_P(
@@ -285,7 +337,12 @@ INSTANTIATE_TEST_SUITE_P(
   Tests_MGSimilarity_Rmat,
   ::testing::Combine(
     // disable correctness checks for large graphs
-    ::testing::Values(Similarity_Usecase{false, false, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false))));
 
 CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/link_prediction/similarity_test.cu b/cpp/tests/link_prediction/similarity_test.cu
index ec6db102830..0c4f526264f 100644
--- a/cpp/tests/link_prediction/similarity_test.cu
+++ b/cpp/tests/link_prediction/similarity_test.cu
@@ -83,11 +83,6 @@ class Tests_Similarity
     auto edge_weight_view =
       edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
 
-    if (cugraph::test::g_perf) {
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
-      hr_timer.start("Similarity test");
-    }
-
     rmm::device_uvector<vertex_t> v1(0, handle.get_stream());
     rmm::device_uvector<vertex_t> v2(0, handle.get_stream());
     rmm::device_uvector<weight_t> result_score(0, handle.get_stream());
@@ -111,6 +106,11 @@ class Tests_Similarity
     }
 
     if (similarity_usecase.all_pairs) {
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        hr_timer.start("Similarity test");
+      }
+
       std::tie(v1, v2, result_score) = test_functor.run(handle,
                                                         graph_view,
                                                         edge_weight_view,
@@ -141,21 +141,17 @@ class Tests_Similarity
                                              static_cast<vertex_t>(sources.size()),
                                              true);
 
-      auto new_size = thrust::distance(
-        thrust::make_zip_iterator(v1.begin(), v2.begin()),
-        thrust::remove_if(
-          handle.get_thrust_policy(),
-          thrust::make_zip_iterator(v1.begin(), v2.begin()),
-          thrust::make_zip_iterator(v1.end(), v2.end()),
-          [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
-
-      v1.resize(new_size, handle.get_stream());
-      v2.resize(new_size, handle.get_stream());
+      std::tie(v1, v2) = cugraph::test::remove_self_loops(handle, std::move(v1), std::move(v2));
 
       // FIXME:  Need to add some tests that specify actual vertex pairs
       std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
         {v1.data(), v1.size()}, {v2.data(), v2.size()}};
 
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        hr_timer.start("Similarity test");
+      }
+
       result_score = test_functor.run(
         handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
     }
diff --git a/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp
new file mode 100644
index 00000000000..6ea00cf5104
--- /dev/null
+++ b/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/property_generator_utilities.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Heterogeneous_Biased_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  int32_t num_edge_types{1};
+  bool flag_replacement{true};
+  // FIXME: Edge masking is currently not tested because it will
+  // require attaching two masks (edge_type_t, bool_t) which
+  // is not currently supported. Once a primitive to support
+  // heterogeneous sampling is added, maintaining two masks
+  // won't be necessary
+  // bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_Heterogeneous_Biased_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Heterogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_Heterogeneous_Biased_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(std::tuple<Heterogeneous_Biased_Neighbor_Sampling_Usecase const&,
+                                   input_usecase_t const&> const& param)
+  {
+    using edge_type_t = int32_t;
+
+    auto [heterogeneous_biased_neighbor_sampling_usecase, input_usecase] = param;
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    auto [graph, edge_weights, renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, false>(
+        handle, input_usecase, true, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto graph_view = graph.view();
+    auto edge_weight_view =
+      edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
+
+    constexpr float select_probability{0.05};
+
+    // FIXME:  Update the tests to initialize RngState and use it instead
+    //         of seed...
+    constexpr uint64_t seed{0};
+
+    raft::random::RngState rng_state(seed);
+
+    auto random_sources = cugraph::select_random_vertices(
+      handle,
+      graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+      std::max(static_cast<size_t>(graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(graph_view.number_of_vertices()), size_t{1})),
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto batch_number = std::make_optional<rmm::device_uvector<int32_t>>(0, handle.get_stream());
+
+    batch_number =
+      cugraph::test::sequence(handle,
+                              random_sources.size(),
+                              heterogeneous_biased_neighbor_sampling_usecase.batch_size,
+                              int32_t{0});
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle.get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle.get_stream());
+
+    std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank_mapping{std::nullopt};
+
+    // Generate the edge types
+
+    std::optional<cugraph::edge_property_t<decltype(graph_view), edge_type_t>> edge_types{
+      std::nullopt};
+
+    if (heterogeneous_biased_neighbor_sampling_usecase.num_edge_types > 1) {
+      edge_types = cugraph::test::generate<decltype(graph_view), edge_type_t>::edge_property(
+        handle, graph_view, heterogeneous_biased_neighbor_sampling_usecase.num_edge_types);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Biased neighbor sampling");
+    }
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::heterogeneous_biased_neighbor_sample(
+        handle,
+        rng_state,
+        graph_view,
+        edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        edge_types
+          ? std::optional<cugraph::edge_property_view_t<edge_t, edge_type_t const*>>{(*edge_types)
+                                                                                       .view()}
+          : std::nullopt,
+        *edge_weight_view,
+        raft::device_span<vertex_t const>{random_sources_copy.data(), random_sources.size()},
+        batch_number ? std::make_optional(raft::device_span<int32_t const>{batch_number->data(),
+                                                                           batch_number->size()})
+                     : std::nullopt,
+        label_to_output_comm_rank_mapping,
+        raft::host_span<int32_t const>(
+          heterogeneous_biased_neighbor_sampling_usecase.fanout.data(),
+          heterogeneous_biased_neighbor_sampling_usecase.fanout.size()),
+        heterogeneous_biased_neighbor_sampling_usecase.num_edge_types,
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  heterogeneous_biased_neighbor_sampling_usecase.flag_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (heterogeneous_biased_neighbor_sampling_usecase.check_correctness) {
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * src_out.size(), handle.get_stream());
+      raft::copy(vertices.data(), src_out.data(), src_out.size(), handle.get_stream());
+      raft::copy(
+        vertices.data() + src_out.size(), dst_out.data(), dst_out.size(), handle.get_stream());
+      vertices = cugraph::test::sort<vertex_t>(handle, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(handle, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle.get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle.get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        handle,
+        graph_view,
+        edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph(
+        handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out));
+
+      if (random_sources.size() < 100) {
+        // This validation is too expensive for large number of vertices
+        ASSERT_TRUE(cugraph::test::validate_sampling_depth(
+          handle,
+          std::move(src_out),
+          std::move(dst_out),
+          std::move(wgt_out),
+          std::move(random_sources),
+          heterogeneous_biased_neighbor_sampling_usecase.fanout.size()));
+      }
+    }
+  }
+};
+
+using Tests_Heterogeneous_Biased_Neighbor_Sampling_File =
+  Tests_Heterogeneous_Biased_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat =
+  Tests_Heterogeneous_Biased_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_Heterogeneous_Biased_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Biased_Neighbor_Sampling_File, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_Heterogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_Heterogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Heterogeneous_Biased_Neighbor_Sampling_Usecase{
+        {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false},
+      Heterogeneous_Biased_Neighbor_Sampling_Usecase{
+        {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))));
+// #endif
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp
new file mode 100644
index 00000000000..3b57aed4768
--- /dev/null
+++ b/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/property_generator_utilities.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Heterogeneous_Uniform_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  int32_t num_edge_types{1};
+  bool flag_replacement{true};
+  // FIXME: Edge masking is currently not tested because it will
+  // require attaching two masks (edge_type_t, bool_t) which
+  // is not currently supported. Once a primitive to support
+  // heterogeneous sampling is added, maintaining two masks
+  // won't be necessary
+  // bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_Heterogeneous_Uniform_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Heterogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(std::tuple<Heterogeneous_Uniform_Neighbor_Sampling_Usecase const&,
+                                   input_usecase_t const&> const& param)
+  {
+    using edge_type_t = int32_t;
+
+    auto [heterogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param;
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    auto [graph, edge_weights, renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, false>(
+        handle, input_usecase, true, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto graph_view = graph.view();
+    auto edge_weight_view =
+      edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
+
+    constexpr float select_probability{0.05};
+
+    constexpr uint64_t seed{0};
+
+    raft::random::RngState rng_state(seed);
+
+    auto random_sources = cugraph::select_random_vertices(
+      handle,
+      graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+      std::max(static_cast<size_t>(graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(graph_view.number_of_vertices()), size_t{1})),
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto batch_number = std::make_optional<rmm::device_uvector<int32_t>>(0, handle.get_stream());
+
+    batch_number =
+      cugraph::test::sequence(handle,
+                              random_sources.size(),
+                              heterogeneous_uniform_neighbor_sampling_usecase.batch_size,
+                              int32_t{0});
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle.get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle.get_stream());
+
+    std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank_mapping{std::nullopt};
+
+    // Generate the edge types
+
+    std::optional<cugraph::edge_property_t<decltype(graph_view), int32_t>> edge_types{std::nullopt};
+
+    if (heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types > 1) {
+      edge_types = cugraph::test::generate<decltype(graph_view), int32_t>::edge_property(
+        handle, graph_view, heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Uniform neighbor sampling");
+    }
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::heterogeneous_uniform_neighbor_sample(
+        handle,
+        rng_state,
+        graph_view,
+        edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        edge_types
+          ? std::optional<cugraph::edge_property_view_t<edge_t, edge_type_t const*>>{(*edge_types)
+                                                                                       .view()}
+          : std::nullopt,
+        raft::device_span<vertex_t const>{random_sources_copy.data(), random_sources.size()},
+        batch_number ? std::make_optional(raft::device_span<int32_t const>{batch_number->data(),
+                                                                           batch_number->size()})
+                     : std::nullopt,
+        label_to_output_comm_rank_mapping,
+        raft::host_span<int32_t const>(
+          heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(),
+          heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()),
+        heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types,
+        cugraph::sampling_flags_t{
+          cugraph::prior_sources_behavior_t{0},
+          true,   // return_hops
+          false,  // dedupe_sources
+          heterogeneous_uniform_neighbor_sampling_usecase.flag_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (heterogeneous_uniform_neighbor_sampling_usecase.check_correctness) {
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * src_out.size(), handle.get_stream());
+      raft::copy(vertices.data(), src_out.data(), src_out.size(), handle.get_stream());
+      raft::copy(
+        vertices.data() + src_out.size(), dst_out.data(), dst_out.size(), handle.get_stream());
+      vertices = cugraph::test::sort<vertex_t>(handle, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(handle, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle.get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle.get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        handle,
+        graph_view,
+        edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph(
+        handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out));
+
+      if (random_sources.size() < 100) {
+        // This validation is too expensive for large number of vertices
+        ASSERT_TRUE(cugraph::test::validate_sampling_depth(
+          handle,
+          std::move(src_out),
+          std::move(dst_out),
+          std::move(wgt_out),
+          std::move(random_sources),
+          heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()));
+      }
+    }
+  }
+};
+
+using Tests_Heterogeneous_Uniform_Neighbor_Sampling_File =
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat =
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_File, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{
+        {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false},
+      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{
+        {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))));
+// #endif
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp
new file mode 100644
index 00000000000..14cf54e7d1c
--- /dev/null
+++ b/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/property_generator_utilities.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Homogeneous_Biased_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  bool flag_replacement{true};
+
+  bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_Homogeneous_Biased_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Homogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_Homogeneous_Biased_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(std::tuple<Homogeneous_Biased_Neighbor_Sampling_Usecase const&,
+                                   input_usecase_t const&> const& param)
+  {
+    auto [homogeneous_biased_neighbor_sampling_usecase, input_usecase] = param;
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    auto [graph, edge_weights, renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, false>(
+        handle, input_usecase, true, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto graph_view = graph.view();
+    auto edge_weight_view =
+      edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
+
+    std::optional<cugraph::edge_property_t<decltype(graph_view), bool>> edge_mask{std::nullopt};
+    if (homogeneous_biased_neighbor_sampling_usecase.edge_masking) {
+      edge_mask =
+        cugraph::test::generate<decltype(graph_view), bool>::edge_property(handle, graph_view, 2);
+      graph_view.attach_edge_mask((*edge_mask).view());
+    }
+
+    constexpr float select_probability{0.05};
+
+    constexpr uint64_t seed{0};
+
+    raft::random::RngState rng_state(seed);
+
+    auto random_sources = cugraph::select_random_vertices(
+      handle,
+      graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+      std::max(static_cast<size_t>(graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(graph_view.number_of_vertices()), size_t{1})),
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto batch_number = std::make_optional<rmm::device_uvector<int32_t>>(0, handle.get_stream());
+
+    batch_number = cugraph::test::sequence(handle,
+                                           random_sources.size(),
+                                           homogeneous_biased_neighbor_sampling_usecase.batch_size,
+                                           int32_t{0});
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle.get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle.get_stream());
+
+    std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank_mapping{std::nullopt};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Biased neighbor sampling");
+    }
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::homogeneous_biased_neighbor_sample(
+        handle,
+        rng_state,
+        graph_view,
+        edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
+        *edge_weight_view,
+        raft::device_span<vertex_t const>{random_sources_copy.data(), random_sources.size()},
+        batch_number ? std::make_optional(raft::device_span<int32_t const>{batch_number->data(),
+                                                                           batch_number->size()})
+                     : std::nullopt,
+        label_to_output_comm_rank_mapping,
+        raft::host_span<int32_t const>(homogeneous_biased_neighbor_sampling_usecase.fanout.data(),
+                                       homogeneous_biased_neighbor_sampling_usecase.fanout.size()),
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  homogeneous_biased_neighbor_sampling_usecase.flag_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (homogeneous_biased_neighbor_sampling_usecase.check_correctness) {
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * src_out.size(), handle.get_stream());
+      raft::copy(vertices.data(), src_out.data(), src_out.size(), handle.get_stream());
+      raft::copy(
+        vertices.data() + src_out.size(), dst_out.data(), dst_out.size(), handle.get_stream());
+      vertices = cugraph::test::sort<vertex_t>(handle, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(handle, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle.get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle.get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        handle,
+        graph_view,
+        edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph(
+        handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out));
+
+      if (random_sources.size() < 100) {
+        // This validation is too expensive for large number of vertices
+        ASSERT_TRUE(cugraph::test::validate_sampling_depth(
+          handle,
+          std::move(src_out),
+          std::move(dst_out),
+          std::move(wgt_out),
+          std::move(random_sources),
+          homogeneous_biased_neighbor_sampling_usecase.fanout.size()));
+      }
+    }
+  }
+};
+
+using Tests_Homogeneous_Biased_Neighbor_Sampling_File =
+  Tests_Homogeneous_Biased_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat =
+  Tests_Homogeneous_Biased_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_Homogeneous_Biased_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Biased_Neighbor_Sampling_File, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_Homogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_Homogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp
new file mode 100644
index 00000000000..a257e424b3e
--- /dev/null
+++ b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/property_generator_utilities.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Homogeneous_Uniform_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  bool flag_replacement{true};
+
+  bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_Homogeneous_Uniform_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Homogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_Homogeneous_Uniform_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(std::tuple<Homogeneous_Uniform_Neighbor_Sampling_Usecase const&,
+                                   input_usecase_t const&> const& param)
+  {
+    auto [homogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param;
+
+    raft::handle_t handle{};
+    HighResTimer hr_timer{};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Construct graph");
+    }
+
+    auto [graph, edge_weights, renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, false>(
+        handle, input_usecase, true, true);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto graph_view = graph.view();
+    auto edge_weight_view =
+      edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
+
+    std::optional<cugraph::edge_property_t<decltype(graph_view), bool>> edge_mask{std::nullopt};
+    if (homogeneous_uniform_neighbor_sampling_usecase.edge_masking) {
+      edge_mask =
+        cugraph::test::generate<decltype(graph_view), bool>::edge_property(handle, graph_view, 2);
+      graph_view.attach_edge_mask((*edge_mask).view());
+    }
+
+    // FIXME: Read a tuple of two edge mask and mask out if edge mask is set in either 1 (OR) and
+    // create a new one. No graph view can have two mask and perform OR in itself, and need to OR
+    // the mask manually by itself.
+
+    constexpr float select_probability{0.05};
+
+    // FIXME:  Update the tests to initialize RngState and use it instead
+    //         of seed...
+    constexpr uint64_t seed{0};
+
+    raft::random::RngState rng_state(seed);
+
+    auto random_sources = cugraph::select_random_vertices(
+      handle,
+      graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+      std::max(static_cast<size_t>(graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(graph_view.number_of_vertices()), size_t{1})),
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto batch_number = std::make_optional<rmm::device_uvector<int32_t>>(0, handle.get_stream());
+
+    batch_number = cugraph::test::sequence(handle,
+                                           random_sources.size(),
+                                           homogeneous_uniform_neighbor_sampling_usecase.batch_size,
+                                           int32_t{0});
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle.get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle.get_stream());
+
+    std::optional<raft::device_span<int32_t const>> label_to_output_comm_rank_mapping{std::nullopt};
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.start("Uniform neighbor sampling");
+    }
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::homogeneous_uniform_neighbor_sample(
+        handle,
+        rng_state,
+        graph_view,
+        edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
+        raft::device_span<vertex_t const>{random_sources_copy.data(), random_sources.size()},
+        batch_number ? std::make_optional(raft::device_span<int32_t const>{batch_number->data(),
+                                                                           batch_number->size()})
+                     : std::nullopt,
+        label_to_output_comm_rank_mapping,
+        raft::host_span<int32_t const>(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(),
+                                       homogeneous_uniform_neighbor_sampling_usecase.fanout.size()),
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  homogeneous_uniform_neighbor_sampling_usecase.flag_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (homogeneous_uniform_neighbor_sampling_usecase.check_correctness) {
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * src_out.size(), handle.get_stream());
+      raft::copy(vertices.data(), src_out.data(), src_out.size(), handle.get_stream());
+      raft::copy(
+        vertices.data() + src_out.size(), dst_out.data(), dst_out.size(), handle.get_stream());
+      vertices = cugraph::test::sort<vertex_t>(handle, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(handle, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle.get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle.get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle.get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        handle,
+        graph_view,
+        edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph(
+        handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out));
+
+      if (random_sources.size() < 100) {
+        // This validation is too expensive for large number of vertices
+        ASSERT_TRUE(cugraph::test::validate_sampling_depth(
+          handle,
+          std::move(src_out),
+          std::move(dst_out),
+          std::move(wgt_out),
+          std::move(random_sources),
+          homogeneous_uniform_neighbor_sampling_usecase.fanout.size()));
+      }
+    }
+  }
+};
+
+using Tests_Homogeneous_Uniform_Neighbor_Sampling_File =
+  Tests_Homogeneous_Uniform_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat =
+  Tests_Homogeneous_Uniform_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_Homogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_Homogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))));
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp
new file mode 100644
index 00000000000..18d8491435d
--- /dev/null
+++ b/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/device_comm_wrapper.hpp"
+#include "utilities/mg_utilities.hpp"
+#include "utilities/property_generator_utilities.hpp"
+#include "utilities/test_graphs.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Heterogeneous_Biased_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  int32_t num_edge_types{1};
+  bool with_replacement{true};
+  // FIXME: Edge masking is currently not tested because it will
+  // require attaching two masks (edge_type_t, bool_t) which
+  // is not currently supported. Once a primitive to support
+  // heterogeneous sampling is added, maintaining two masks
+  // won't be necessary
+  // bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGHeterogeneous_Biased_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Heterogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(
+    std::tuple<Heterogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t> const& param)
+  {
+    using edge_type_t = int32_t;
+
+    auto [heterogeneous_biased_neighbor_sampling_usecase, input_usecase] = param;
+
+    HighResTimer hr_timer{};
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG construct graph");
+    }
+
+    auto [mg_graph, mg_edge_weights, mg_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_,
+        input_usecase,
+        true /* test_weighted */,
+        true /* renumber */,
+        false /* drop_self_loops */,
+        false /* drop_multi_edges */);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto mg_graph_view = mg_graph.view();
+    auto mg_edge_weight_view =
+      mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
+
+    //
+    // Test is designed like GNN sampling.  We'll select 5% of vertices to be included in sampling
+    // batches
+    //
+
+    constexpr float select_probability{0.05};
+
+    raft::random::RngState rng_state(handle_->get_comms().get_rank());
+
+    auto random_sources = cugraph::select_random_vertices(
+      *handle_,
+      mg_graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+
+      std::max(static_cast<size_t>(mg_graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(mg_graph_view.number_of_vertices()), size_t{1})),
+
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto seed_sizes = cugraph::host_scalar_allgather(
+      handle_->get_comms(), random_sources.size(), handle_->get_stream());
+    size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end());
+    size_t num_batches =
+      (num_seeds + heterogeneous_biased_neighbor_sampling_usecase.batch_size - 1) /
+      heterogeneous_biased_neighbor_sampling_usecase.batch_size;
+
+    std::vector<size_t> seed_offsets(seed_sizes.size());
+    std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0});
+
+    auto batch_number = cugraph::test::modulo_sequence<int32_t>(
+      *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]);
+
+    // Get unique batch_number -> label_list
+    rmm::device_uvector<int32_t> label_list(batch_number.size(), handle_->get_stream());
+
+    raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream());
+
+    label_list = cugraph::test::sort<int32_t>(*handle_, std::move(label_list));
+    label_list = cugraph::test::unique<int32_t>(*handle_, std::move(label_list));
+
+    auto num_unique_labels = label_list.size();
+
+    auto comm_ranks = cugraph::test::scalar_fill<int32_t>(
+      *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()});
+
+    // perform allgatherv
+    comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size());
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle_->get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle_->get_stream());
+
+    // Generate the edge types
+
+    std::optional<cugraph::edge_property_t<decltype(mg_graph_view), edge_type_t>> edge_types{
+      std::nullopt};
+
+    if (heterogeneous_biased_neighbor_sampling_usecase.num_edge_types > 1) {
+      edge_types = cugraph::test::generate<decltype(mg_graph_view), edge_type_t>::edge_property(
+        *handle_, mg_graph_view, heterogeneous_biased_neighbor_sampling_usecase.num_edge_types);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG biased_neighbor_sample");
+    }
+    RAFT_CUDA_TRY(cudaDeviceSynchronize());
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::heterogeneous_biased_neighbor_sample(
+        *handle_,
+        rng_state,
+        mg_graph_view,
+        mg_edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        edge_types
+          ? std::optional<cugraph::edge_property_view_t<edge_t, edge_type_t const*>>{(*edge_types)
+                                                                                       .view()}
+          : std::nullopt,
+        *mg_edge_weight_view,
+        raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()},
+        std::make_optional(
+          raft::device_span<int32_t const>{batch_number.data(), batch_number.size()}),
+        std::make_optional(raft::device_span<int32_t const>{comm_ranks.data(), comm_ranks.size()}),
+        raft::host_span<int32_t const>(
+          heterogeneous_biased_neighbor_sampling_usecase.fanout.data(),
+          heterogeneous_biased_neighbor_sampling_usecase.fanout.size()),
+        heterogeneous_biased_neighbor_sampling_usecase.num_edge_types,
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  heterogeneous_biased_neighbor_sampling_usecase.with_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (heterogeneous_biased_neighbor_sampling_usecase.check_correctness) {
+      // Consolidate results on GPU 0
+      auto mg_start_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()});
+      auto mg_aggregate_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_out.data(), src_out.size()});
+      auto mg_aggregate_dst = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()});
+      auto mg_aggregate_wgt =
+        wgt_out ? std::make_optional(cugraph::test::device_gatherv(
+                    *handle_, raft::device_span<weight_t const>{wgt_out->data(), wgt_out->size()}))
+                : std::nullopt;
+
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(
+        vertices.data(), mg_aggregate_src.data(), mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(vertices.data() + mg_aggregate_src.size(),
+                 mg_aggregate_dst.data(),
+                 mg_aggregate_dst.size(),
+                 handle_->get_stream());
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      vertices = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
+        *handle_, std::move(vertices), mg_graph_view.vertex_partition_range_lasts());
+
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle_->get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle_->get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      auto mg_aggregate_src_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_compare.data(), src_compare.size()});
+      auto mg_aggregate_dst_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_compare.data(), dst_compare.size()});
+      auto mg_aggregate_wgt_compare =
+        wgt_compare
+          ? std::make_optional(cugraph::test::device_gatherv(
+              *handle_,
+              raft::device_span<weight_t const>{wgt_compare->data(), wgt_compare->size()}))
+          : std::nullopt;
+
+      if (handle_->get_comms().get_rank() == 0) {
+        cugraph::test::validate_extracted_graph_is_subgraph(*handle_,
+                                                            mg_aggregate_src_compare,
+                                                            mg_aggregate_dst_compare,
+                                                            mg_aggregate_wgt_compare,
+                                                            mg_aggregate_src,
+                                                            mg_aggregate_dst,
+                                                            mg_aggregate_wgt);
+
+        if (random_sources.size() < 100) {
+          // This validation is too expensive for large number of vertices
+          if (mg_aggregate_src.size() > 0) {
+            cugraph::test::validate_sampling_depth(
+              *handle_,
+              std::move(mg_aggregate_src),
+              std::move(mg_aggregate_dst),
+              std::move(mg_aggregate_wgt),
+              std::move(mg_start_src),
+              heterogeneous_biased_neighbor_sampling_usecase.fanout.size());
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t>
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGHeterogeneous_Biased_Neighbor_Sampling_File =
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_MGHeterogeneous_Biased_Neighbor_Sampling_Rmat =
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGHeterogeneous_Biased_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHeterogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHeterogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(
+      // cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+      cugraph::test::Rmat_Usecase(5, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGHeterogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp
new file mode 100644
index 00000000000..b6812b35170
--- /dev/null
+++ b/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/device_comm_wrapper.hpp"
+#include "utilities/mg_utilities.hpp"
+#include "utilities/property_generator_utilities.hpp"
+#include "utilities/test_graphs.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Heterogeneous_Uniform_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  int32_t num_edge_types{1};
+  bool with_replacement{true};
+  // FIXME: Edge masking is currently not tested because it will
+  // require attaching two masks (edge_type_t, bool_t) which
+  // is not currently supported. Once a primitive to support
+  // heterogeneous sampling is added, maintaining two masks
+  // won't be necessary
+  // bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Heterogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(
+    std::tuple<Heterogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t> const& param)
+  {
+    using edge_type_t = int32_t;
+
+    auto [heterogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param;
+
+    HighResTimer hr_timer{};
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG construct graph");
+    }
+
+    auto [mg_graph, mg_edge_weights, mg_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_,
+        input_usecase,
+        true /* test_weighted */,
+        true /* renumber */,
+        false /* drop_self_loops */,
+        false /* drop_multi_edges */);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto mg_graph_view = mg_graph.view();
+    auto mg_edge_weight_view =
+      mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
+
+    //
+    // Test is designed like GNN sampling.  We'll select 5% of vertices to be included in sampling
+    // batches
+    //
+
+    constexpr float select_probability{0.05};
+
+    raft::random::RngState rng_state(handle_->get_comms().get_rank());
+
+    auto random_sources = cugraph::select_random_vertices(
+      *handle_,
+      mg_graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+
+      std::max(static_cast<size_t>(mg_graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(mg_graph_view.number_of_vertices()), size_t{1})),
+
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto seed_sizes = cugraph::host_scalar_allgather(
+      handle_->get_comms(), random_sources.size(), handle_->get_stream());
+    size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end());
+    size_t num_batches =
+      (num_seeds + heterogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) /
+      heterogeneous_uniform_neighbor_sampling_usecase.batch_size;
+
+    std::vector<size_t> seed_offsets(seed_sizes.size());
+    std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0});
+
+    auto batch_number = cugraph::test::modulo_sequence<int32_t>(
+      *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]);
+
+    // Get unique batch_number -> label_list
+    rmm::device_uvector<int32_t> label_list(batch_number.size(), handle_->get_stream());
+
+    raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream());
+
+    label_list = cugraph::test::sort<int32_t>(*handle_, std::move(label_list));
+    label_list = cugraph::test::unique<int32_t>(*handle_, std::move(label_list));
+
+    auto num_unique_labels = label_list.size();
+
+    auto comm_ranks = cugraph::test::scalar_fill<int32_t>(
+      *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()});
+
+    // perform allgatherv
+    comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size());
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle_->get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle_->get_stream());
+
+    // Generate the edge types
+
+    std::optional<cugraph::edge_property_t<decltype(mg_graph_view), edge_type_t>> edge_types{
+      std::nullopt};
+
+    if (heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types > 1) {
+      edge_types = cugraph::test::generate<decltype(mg_graph_view), edge_type_t>::edge_property(
+        *handle_, mg_graph_view, heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types);
+    }
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG uniform_neighbor_sample");
+    }
+    RAFT_CUDA_TRY(cudaDeviceSynchronize());
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::heterogeneous_uniform_neighbor_sample(
+        *handle_,
+        rng_state,
+        mg_graph_view,
+        mg_edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        edge_types
+          ? std::optional<cugraph::edge_property_view_t<edge_t, edge_type_t const*>>{(*edge_types)
+                                                                                       .view()}
+          : std::nullopt,
+        raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()},
+        std::make_optional(
+          raft::device_span<int32_t const>{batch_number.data(), batch_number.size()}),
+        std::make_optional(raft::device_span<int32_t const>{comm_ranks.data(), comm_ranks.size()}),
+        raft::host_span<int32_t const>(
+          heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(),
+          heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()),
+        heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types,
+        cugraph::sampling_flags_t{
+          cugraph::prior_sources_behavior_t{0},
+          true,   // return_hops
+          false,  // dedupe_sources
+          heterogeneous_uniform_neighbor_sampling_usecase.with_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (heterogeneous_uniform_neighbor_sampling_usecase.check_correctness) {
+      // Consolidate results on GPU 0
+      auto mg_start_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()});
+      auto mg_aggregate_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_out.data(), src_out.size()});
+      auto mg_aggregate_dst = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()});
+      auto mg_aggregate_wgt =
+        wgt_out ? std::make_optional(cugraph::test::device_gatherv(
+                    *handle_, raft::device_span<weight_t const>{wgt_out->data(), wgt_out->size()}))
+                : std::nullopt;
+
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(
+        vertices.data(), mg_aggregate_src.data(), mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(vertices.data() + mg_aggregate_src.size(),
+                 mg_aggregate_dst.data(),
+                 mg_aggregate_dst.size(),
+                 handle_->get_stream());
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      vertices = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
+        *handle_, std::move(vertices), mg_graph_view.vertex_partition_range_lasts());
+
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle_->get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle_->get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      auto mg_aggregate_src_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_compare.data(), src_compare.size()});
+      auto mg_aggregate_dst_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_compare.data(), dst_compare.size()});
+      auto mg_aggregate_wgt_compare =
+        wgt_compare
+          ? std::make_optional(cugraph::test::device_gatherv(
+              *handle_,
+              raft::device_span<weight_t const>{wgt_compare->data(), wgt_compare->size()}))
+          : std::nullopt;
+
+      if (handle_->get_comms().get_rank() == 0) {
+        cugraph::test::validate_extracted_graph_is_subgraph(*handle_,
+                                                            mg_aggregate_src_compare,
+                                                            mg_aggregate_dst_compare,
+                                                            mg_aggregate_wgt_compare,
+                                                            mg_aggregate_src,
+                                                            mg_aggregate_dst,
+                                                            mg_aggregate_wgt);
+
+        if (random_sources.size() < 100) {
+          // This validation is too expensive for large number of vertices
+          if (mg_aggregate_src.size() > 0) {
+            cugraph::test::validate_sampling_depth(
+              *handle_,
+              std::move(mg_aggregate_src),
+              std::move(mg_aggregate_dst),
+              std::move(mg_aggregate_wgt),
+              std::move(mg_start_src),
+              heterogeneous_uniform_neighbor_sampling_usecase.fanout.size());
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t>
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_File =
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_Rmat =
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(
+      // cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+      cugraph::test::Rmat_Usecase(5, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false},
+                      Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, true}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp
new file mode 100644
index 00000000000..ce153fd3f75
--- /dev/null
+++ b/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/device_comm_wrapper.hpp"
+#include "utilities/mg_utilities.hpp"
+#include "utilities/property_generator_utilities.hpp"
+#include "utilities/test_graphs.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Homogeneous_Biased_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  bool with_replacement{true};
+
+  bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGHomogeneous_Biased_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Homogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(
+    std::tuple<Homogeneous_Biased_Neighbor_Sampling_Usecase, input_usecase_t> const& param)
+  {
+    auto [homogeneous_biased_neighbor_sampling_usecase, input_usecase] = param;
+
+    HighResTimer hr_timer{};
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG construct graph");
+    }
+
+    auto [mg_graph, mg_edge_weights, mg_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_,
+        input_usecase,
+        true /* test_weighted */,
+        true /* renumber */,
+        false /* drop_self_loops */,
+        false /* drop_multi_edges */);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto mg_graph_view = mg_graph.view();
+    auto mg_edge_weight_view =
+      mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
+
+    std::optional<cugraph::edge_property_t<decltype(mg_graph_view), bool>> edge_mask{std::nullopt};
+    if (homogeneous_biased_neighbor_sampling_usecase.edge_masking) {
+      edge_mask = cugraph::test::generate<decltype(mg_graph_view), bool>::edge_property(
+        *handle_, mg_graph_view, 2);
+      mg_graph_view.attach_edge_mask((*edge_mask).view());
+    }
+
+    //
+    // Test is designed like GNN sampling.  We'll select 5% of vertices to be included in sampling
+    // batches
+    //
+
+    constexpr float select_probability{0.05};
+
+    raft::random::RngState rng_state(handle_->get_comms().get_rank());
+
+    auto random_sources = cugraph::select_random_vertices(
+      *handle_,
+      mg_graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+
+      std::max(static_cast<size_t>(mg_graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(mg_graph_view.number_of_vertices()), size_t{1})),
+
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto seed_sizes = cugraph::host_scalar_allgather(
+      handle_->get_comms(), random_sources.size(), handle_->get_stream());
+    size_t num_seeds   = std::reduce(seed_sizes.begin(), seed_sizes.end());
+    size_t num_batches = (num_seeds + homogeneous_biased_neighbor_sampling_usecase.batch_size - 1) /
+                         homogeneous_biased_neighbor_sampling_usecase.batch_size;
+
+    std::vector<size_t> seed_offsets(seed_sizes.size());
+    std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0});
+
+    auto batch_number = cugraph::test::modulo_sequence<int32_t>(
+      *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]);
+
+    // Get unique batch_number -> label_list
+    rmm::device_uvector<int32_t> label_list(batch_number.size(), handle_->get_stream());
+
+    raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream());
+
+    label_list = cugraph::test::sort<int32_t>(*handle_, std::move(label_list));
+    label_list = cugraph::test::unique<int32_t>(*handle_, std::move(label_list));
+
+    auto num_unique_labels = label_list.size();
+
+    auto comm_ranks = cugraph::test::scalar_fill<int32_t>(
+      *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()});
+
+    // perform allgatherv
+    comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size());
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle_->get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle_->get_stream());
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG homogeneous_biased_neighbor_sample");
+    }
+    RAFT_CUDA_TRY(cudaDeviceSynchronize());
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::homogeneous_biased_neighbor_sample(
+        *handle_,
+        rng_state,
+        mg_graph_view,
+        mg_edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
+        *mg_edge_weight_view,
+        raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()},
+        std::make_optional(
+          raft::device_span<int32_t const>{batch_number.data(), batch_number.size()}),
+        std::make_optional(raft::device_span<int32_t const>{comm_ranks.data(), comm_ranks.size()}),
+        raft::host_span<int32_t const>(homogeneous_biased_neighbor_sampling_usecase.fanout.data(),
+                                       homogeneous_biased_neighbor_sampling_usecase.fanout.size()),
+
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  homogeneous_biased_neighbor_sampling_usecase.with_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (homogeneous_biased_neighbor_sampling_usecase.check_correctness) {
+      // Consolidate results on GPU 0
+      auto mg_start_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()});
+      auto mg_aggregate_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_out.data(), src_out.size()});
+      auto mg_aggregate_dst = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()});
+      auto mg_aggregate_wgt =
+        wgt_out ? std::make_optional(cugraph::test::device_gatherv(
+                    *handle_, raft::device_span<weight_t const>{wgt_out->data(), wgt_out->size()}))
+                : std::nullopt;
+
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(
+        vertices.data(), mg_aggregate_src.data(), mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(vertices.data() + mg_aggregate_src.size(),
+                 mg_aggregate_dst.data(),
+                 mg_aggregate_dst.size(),
+                 handle_->get_stream());
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      vertices = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
+        *handle_, std::move(vertices), mg_graph_view.vertex_partition_range_lasts());
+
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle_->get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle_->get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      auto mg_aggregate_src_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_compare.data(), src_compare.size()});
+      auto mg_aggregate_dst_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_compare.data(), dst_compare.size()});
+      auto mg_aggregate_wgt_compare =
+        wgt_compare
+          ? std::make_optional(cugraph::test::device_gatherv(
+              *handle_,
+              raft::device_span<weight_t const>{wgt_compare->data(), wgt_compare->size()}))
+          : std::nullopt;
+
+      if (handle_->get_comms().get_rank() == 0) {
+        cugraph::test::validate_extracted_graph_is_subgraph(*handle_,
+                                                            mg_aggregate_src_compare,
+                                                            mg_aggregate_dst_compare,
+                                                            mg_aggregate_wgt_compare,
+                                                            mg_aggregate_src,
+                                                            mg_aggregate_dst,
+                                                            mg_aggregate_wgt);
+
+        if (random_sources.size() < 100) {
+          // This validation is too expensive for large number of vertices
+          if (mg_aggregate_src.size() > 0) {
+            cugraph::test::validate_sampling_depth(
+              *handle_,
+              std::move(mg_aggregate_src),
+              std::move(mg_aggregate_dst),
+              std::move(mg_aggregate_wgt),
+              std::move(mg_start_src),
+              homogeneous_biased_neighbor_sampling_usecase.fanout.size());
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t>
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGHomogeneous_Biased_Neighbor_Sampling_File =
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat =
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGHomogeneous_Biased_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(
+      // cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+      cugraph::test::Rmat_Usecase(5, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false},
+      Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp
new file mode 100644
index 00000000000..88f2b8e28c8
--- /dev/null
+++ b/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "detail/nbr_sampling_validate.hpp"
+#include "utilities/base_fixture.hpp"
+#include "utilities/device_comm_wrapper.hpp"
+#include "utilities/mg_utilities.hpp"
+#include "utilities/property_generator_utilities.hpp"
+#include "utilities/test_graphs.hpp"
+
+#include <cugraph/sampling_functions.hpp>
+#include <cugraph/utilities/high_res_timer.hpp>
+
+#include <gtest/gtest.h>
+
+struct Homogeneous_Uniform_Neighbor_Sampling_Usecase {
+  std::vector<int32_t> fanout{{-1}};
+  int32_t batch_size{10};
+  bool with_replacement{true};
+
+  bool edge_masking{false};
+  bool check_correctness{true};
+};
+
+template <typename input_usecase_t>
+class Tests_MGHomogeneous_Uniform_Neighbor_Sampling
+  : public ::testing::TestWithParam<
+      std::tuple<Homogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t>> {
+ public:
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling() {}
+
+  static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); }
+
+  static void TearDownTestCase() { handle_.reset(); }
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t, typename edge_t, typename weight_t>
+  void run_current_test(
+    std::tuple<Homogeneous_Uniform_Neighbor_Sampling_Usecase, input_usecase_t> const& param)
+  {
+    auto [homogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param;
+
+    HighResTimer hr_timer{};
+
+    // 1. create MG graph
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG construct graph");
+    }
+
+    auto [mg_graph, mg_edge_weights, mg_renumber_map_labels] =
+      cugraph::test::construct_graph<vertex_t, edge_t, weight_t, false, true>(
+        *handle_,
+        input_usecase,
+        true /* test_weighted */,
+        true /* renumber */,
+        false /* drop_self_loops */,
+        false /* drop_multi_edges */);
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    auto mg_graph_view = mg_graph.view();
+    auto mg_edge_weight_view =
+      mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
+
+    std::optional<cugraph::edge_property_t<decltype(mg_graph_view), bool>> edge_mask{std::nullopt};
+    if (homogeneous_uniform_neighbor_sampling_usecase.edge_masking) {
+      edge_mask = cugraph::test::generate<decltype(mg_graph_view), bool>::edge_property(
+        *handle_, mg_graph_view, 2);
+      mg_graph_view.attach_edge_mask((*edge_mask).view());
+    }
+
+    //
+    // Test is designed like GNN sampling.  We'll select 5% of vertices to be included in sampling
+    // batches
+    //
+
+    constexpr float select_probability{0.05};
+
+    raft::random::RngState rng_state(handle_->get_comms().get_rank());
+
+    auto random_sources = cugraph::select_random_vertices(
+      *handle_,
+      mg_graph_view,
+      std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+      rng_state,
+
+      std::max(static_cast<size_t>(mg_graph_view.number_of_vertices() * select_probability),
+               std::min(static_cast<size_t>(mg_graph_view.number_of_vertices()), size_t{1})),
+
+      false,
+      false);
+
+    //
+    //  Now we'll assign the vertices to batches
+    //
+
+    auto seed_sizes = cugraph::host_scalar_allgather(
+      handle_->get_comms(), random_sources.size(), handle_->get_stream());
+    size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end());
+    size_t num_batches =
+      (num_seeds + homogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) /
+      homogeneous_uniform_neighbor_sampling_usecase.batch_size;
+
+    std::vector<size_t> seed_offsets(seed_sizes.size());
+    std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0});
+
+    auto batch_number = cugraph::test::modulo_sequence<int32_t>(
+      *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]);
+
+    // Get unique batch_number -> label_list
+    rmm::device_uvector<int32_t> label_list(batch_number.size(), handle_->get_stream());
+
+    raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream());
+
+    label_list = cugraph::test::sort<int32_t>(*handle_, std::move(label_list));
+    label_list = cugraph::test::unique<int32_t>(*handle_, std::move(label_list));
+
+    auto num_unique_labels = label_list.size();
+
+    auto comm_ranks = cugraph::test::scalar_fill<int32_t>(
+      *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()});
+
+    // perform allgatherv
+    comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size());
+
+    rmm::device_uvector<vertex_t> random_sources_copy(random_sources.size(), handle_->get_stream());
+
+    raft::copy(random_sources_copy.data(),
+               random_sources.data(),
+               random_sources.size(),
+               handle_->get_stream());
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.start("MG uniform_neighbor_sample");
+    }
+    RAFT_CUDA_TRY(cudaDeviceSynchronize());
+
+    auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] =
+      cugraph::homogeneous_uniform_neighbor_sample(
+        *handle_,
+        rng_state,
+        mg_graph_view,
+        mg_edge_weight_view,
+        std::optional<cugraph::edge_property_view_t<edge_t, edge_t const*>>{std::nullopt},
+        std::optional<cugraph::edge_property_view_t<edge_t, int32_t const*>>{std::nullopt},
+        raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()},
+        std::make_optional(
+          raft::device_span<int32_t const>{batch_number.data(), batch_number.size()}),
+        std::make_optional(raft::device_span<int32_t const>{comm_ranks.data(), comm_ranks.size()}),
+        raft::host_span<int32_t const>(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(),
+                                       homogeneous_uniform_neighbor_sampling_usecase.fanout.size()),
+
+        cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0},
+                                  true,   // return_hops
+                                  false,  // dedupe_sources
+                                  homogeneous_uniform_neighbor_sampling_usecase.with_replacement});
+
+    if (cugraph::test::g_perf) {
+      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+      handle_->get_comms().barrier();
+      hr_timer.stop();
+      hr_timer.display_and_clear(std::cout);
+    }
+
+    if (homogeneous_uniform_neighbor_sampling_usecase.check_correctness) {
+      // Consolidate results on GPU 0
+      auto mg_start_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{random_sources.data(), random_sources.size()});
+      auto mg_aggregate_src = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_out.data(), src_out.size()});
+      auto mg_aggregate_dst = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_out.data(), dst_out.size()});
+      auto mg_aggregate_wgt =
+        wgt_out ? std::make_optional(cugraph::test::device_gatherv(
+                    *handle_, raft::device_span<weight_t const>{wgt_out->data(), wgt_out->size()}))
+                : std::nullopt;
+
+      //  First validate that the extracted edges are actually a subset of the
+      //  edges in the input graph
+      rmm::device_uvector<vertex_t> vertices(2 * mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(
+        vertices.data(), mg_aggregate_src.data(), mg_aggregate_src.size(), handle_->get_stream());
+      raft::copy(vertices.data() + mg_aggregate_src.size(),
+                 mg_aggregate_dst.data(),
+                 mg_aggregate_dst.size(),
+                 handle_->get_stream());
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      vertices = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning(
+        *handle_, std::move(vertices), mg_graph_view.vertex_partition_range_lasts());
+
+      vertices = cugraph::test::sort<vertex_t>(*handle_, std::move(vertices));
+      vertices = cugraph::test::unique<vertex_t>(*handle_, std::move(vertices));
+
+      rmm::device_uvector<size_t> d_subgraph_offsets(2, handle_->get_stream());
+      std::vector<size_t> h_subgraph_offsets({0, vertices.size()});
+
+      raft::update_device(d_subgraph_offsets.data(),
+                          h_subgraph_offsets.data(),
+                          h_subgraph_offsets.size(),
+                          handle_->get_stream());
+
+      rmm::device_uvector<vertex_t> src_compare(0, handle_->get_stream());
+      rmm::device_uvector<vertex_t> dst_compare(0, handle_->get_stream());
+      std::optional<rmm::device_uvector<weight_t>> wgt_compare{std::nullopt};
+      std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs(
+        *handle_,
+        mg_graph_view,
+        mg_edge_weight_view,
+        raft::device_span<size_t const>(d_subgraph_offsets.data(), 2),
+        raft::device_span<vertex_t const>(vertices.data(), vertices.size()),
+        true);
+
+      auto mg_aggregate_src_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{src_compare.data(), src_compare.size()});
+      auto mg_aggregate_dst_compare = cugraph::test::device_gatherv(
+        *handle_, raft::device_span<vertex_t const>{dst_compare.data(), dst_compare.size()});
+      auto mg_aggregate_wgt_compare =
+        wgt_compare
+          ? std::make_optional(cugraph::test::device_gatherv(
+              *handle_,
+              raft::device_span<weight_t const>{wgt_compare->data(), wgt_compare->size()}))
+          : std::nullopt;
+
+      if (handle_->get_comms().get_rank() == 0) {
+        cugraph::test::validate_extracted_graph_is_subgraph(*handle_,
+                                                            mg_aggregate_src_compare,
+                                                            mg_aggregate_dst_compare,
+                                                            mg_aggregate_wgt_compare,
+                                                            mg_aggregate_src,
+                                                            mg_aggregate_dst,
+                                                            mg_aggregate_wgt);
+
+        if (random_sources.size() < 100) {
+          // This validation is too expensive for large number of vertices
+          if (mg_aggregate_src.size() > 0) {
+            cugraph::test::validate_sampling_depth(
+              *handle_,
+              std::move(mg_aggregate_src),
+              std::move(mg_aggregate_dst),
+              std::move(mg_aggregate_wgt),
+              std::move(mg_start_src),
+              homogeneous_uniform_neighbor_sampling_usecase.fanout.size());
+          }
+        }
+      }
+    }
+  }
+
+ private:
+  static std::unique_ptr<raft::handle_t> handle_;
+};
+
+template <typename input_usecase_t>
+std::unique_ptr<raft::handle_t>
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling<input_usecase_t>::handle_ = nullptr;
+
+using Tests_MGHomogeneous_Uniform_Neighbor_Sampling_File =
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling<cugraph::test::File_Usecase>;
+
+using Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat =
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling<cugraph::test::Rmat_Usecase>;
+
+TEST_P(Tests_MGHomogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_File_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int32Float)
+{
+  run_current_test<int32_t, int32_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+TEST_P(Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float)
+{
+  run_current_test<int64_t, int64_t, float>(
+    override_Rmat_Usecase_with_cmd_line_arguments(GetParam()));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+  file_test,
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  file_large_test,
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling_File,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
+                      cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_small_test,
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false},
+                      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}),
+    ::testing::Values(
+      // cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));
+      cugraph::test::Rmat_Usecase(5, 16, 0.57, 0.19, 0.19, 0, false, false))));
+
+INSTANTIATE_TEST_SUITE_P(
+  rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with
+                          --gtest_filter to select only the rmat_benchmark_test with a specific
+                          vertex & edge type combination) by command line arguments and do not
+                          include more than one Rmat_Usecase that differ only in scale or edge
+                          factor (to avoid running same benchmarks more than once) */
+  Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat,
+  ::testing::Combine(
+    ::testing::Values(
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false},
+      Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}),
+    ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu
index ef1c4f831eb..095cd15872b 100644
--- a/cpp/tests/utilities/thrust_wrapper.cu
+++ b/cpp/tests/utilities/thrust_wrapper.cu
@@ -41,6 +41,7 @@ namespace test {
 
 template <typename value_t>
 cugraph::dataframe_buffer_type_t<value_t> sort(
+
   raft::handle_t const& handle, cugraph::dataframe_buffer_type_t<value_t> const& values)
 {
   auto sorted_values = cugraph::allocate_dataframe_buffer<value_t>(
@@ -403,6 +404,25 @@ template rmm::device_uvector<int64_t> sequence(raft::handle_t const& handle,
                                                size_t repeat_count,
                                                int64_t init);
 
+template <typename value_t>
+cugraph::dataframe_buffer_type_t<value_t> scalar_fill(raft::handle_t const& handle,
+                                                      size_t length,
+                                                      value_t value)
+{
+  auto values = cugraph::allocate_dataframe_buffer<value_t>(length, handle.get_stream());
+
+  thrust::tabulate(
+    handle.get_thrust_policy(), values.begin(), values.end(), [value] __device__(size_t i) {
+      return value;
+    });
+
+  return values;
+}
+
+template rmm::device_uvector<int32_t> scalar_fill(raft::handle_t const& handle,
+                                                  size_t length,
+                                                  int32_t value);
+
 template <typename value_t>
 cugraph::dataframe_buffer_type_t<value_t> modulo_sequence(raft::handle_t const& handle,
                                                           size_t length,
@@ -546,5 +566,35 @@ template void expand_hypersparse_offsets(raft::handle_t const& handle,
                                          raft::device_span<int64_t> indices,
                                          size_t base_offset);
 
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& v1,
+  rmm::device_uvector<vertex_t>&& v2)
+{
+  auto new_size = thrust::distance(
+    thrust::make_zip_iterator(v1.begin(), v2.begin()),
+    thrust::remove_if(
+      handle.get_thrust_policy(),
+      thrust::make_zip_iterator(v1.begin(), v2.begin()),
+      thrust::make_zip_iterator(v1.end(), v2.end()),
+      [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
+
+  v1.resize(new_size, handle.get_stream());
+  v2.resize(new_size, handle.get_stream());
+
+  return std::make_tuple(std::move(v1), std::move(v2));
+}
+
+template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& v1,
+  rmm::device_uvector<int32_t>&& v2);
+
+template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& v1,
+  rmm::device_uvector<int64_t>&& v2);
+
 }  // namespace test
 }  // namespace cugraph
diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp
index afdff33d80a..b6c8052e6b5 100644
--- a/cpp/tests/utilities/thrust_wrapper.hpp
+++ b/cpp/tests/utilities/thrust_wrapper.hpp
@@ -73,6 +73,11 @@ cugraph::dataframe_buffer_type_t<value_t> sequence(raft::handle_t const& handle,
                                                    size_t repeat_count,
                                                    value_t init);
 
+template <typename value_t>
+cugraph::dataframe_buffer_type_t<value_t> scalar_fill(raft::handle_t const& handle,
+                                                      size_t length,
+                                                      value_t value);
+
 // return (init + i) % modulo, where i = [0, length)
 template <typename value_t>
 cugraph::dataframe_buffer_type_t<value_t> modulo_sequence(raft::handle_t const& handle,
@@ -107,5 +112,11 @@ void expand_hypersparse_offsets(raft::handle_t const& handle,
                                 raft::device_span<idx_t> indices,
                                 offset_t base_offset);
 
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& v1,
+  rmm::device_uvector<vertex_t>&& v2);
+
 }  // namespace test
 }  // namespace cugraph
diff --git a/dependencies.yaml b/dependencies.yaml
index a4143ff90c9..208b3bedd63 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -26,14 +26,12 @@ files:
       - depends_on_cupy
       - depends_on_pytorch
       - python_run_cugraph
-      - python_run_nx_cugraph
       - python_run_cugraph_dgl
       - python_run_cugraph_pyg
       - test_notebook
       - test_python_common
       - test_python_cugraph
       - test_python_pylibcugraph
-      - test_python_nx_cugraph
   checks:
     output: none
     includes:
@@ -150,32 +148,6 @@ files:
       - depends_on_cudf
       - test_python_common
       - test_python_pylibcugraph
-  py_build_nx_cugraph:
-    output: pyproject
-    pyproject_dir: python/nx-cugraph
-    extras:
-      table: build-system
-    includes:
-      - python_build_rapids
-      - python_build_wheel
-  py_run_nx_cugraph:
-    output: pyproject
-    pyproject_dir: python/nx-cugraph
-    extras:
-      table: project
-    includes:
-      - depends_on_pylibcugraph
-      - depends_on_cupy
-      - python_run_nx_cugraph
-  py_test_nx_cugraph:
-    output: pyproject
-    pyproject_dir: python/nx-cugraph
-    extras:
-      table: project.optional-dependencies
-      key: test
-    includes:
-      - test_python_common
-      - test_python_nx_cugraph
   py_build_cugraph_dgl:
     output: pyproject
     pyproject_dir: python/cugraph-dgl
@@ -553,12 +525,6 @@ dependencies:
           - matrix:
             packages:
               - *ucx_py_unsuffixed
-  python_run_nx_cugraph:
-    common:
-      - output_types: [conda, pyproject]
-        packages:
-          - networkx>=3.0
-          - *numpy
   python_run_cugraph_dgl:
     common:
       - output_types: [conda, pyproject]
@@ -683,12 +649,6 @@ dependencies:
       - output_types: [conda, pyproject]
         packages:
           - *numpy
-  test_python_nx_cugraph:
-    common:
-      - output_types: [conda, pyproject]
-        packages:
-            # not needed by nx-cugraph tests, but is required for running networkx tests
-          - pytest-mpl
   cugraph_dgl_dev:
     common:
       - output_types: [conda]
diff --git a/docs/cugraph/source/nx_cugraph/how-it-works.md b/docs/cugraph/source/nx_cugraph/how-it-works.md
index 101b4999eca..0061b0445de 100644
--- a/docs/cugraph/source/nx_cugraph/how-it-works.md
+++ b/docs/cugraph/source/nx_cugraph/how-it-works.md
@@ -110,4 +110,4 @@ This run will be much faster, typically around 5 seconds depending on your GPU.
 
 ---
 
-The latest list of algorithms supported by `nx-cugraph` can be found in [GitHub](https://github.com/rapidsai/cugraph/blob/HEAD/python/nx-cugraph/README.md#algorithms), or in the [Supported Algorithms Section](supported-algorithms.md).
+The latest list of algorithms supported by `nx-cugraph` can be found in [GitHub](https://github.com/rapidsai/nx-cugraph/blob/HEAD/README.md#supported-algorithms), or in the [Supported Algorithms Section](supported-algorithms.md).
diff --git a/docs/cugraph/source/nx_cugraph/index.rst b/docs/cugraph/source/nx_cugraph/index.rst
index 730958a5b73..50565c805a9 100644
--- a/docs/cugraph/source/nx_cugraph/index.rst
+++ b/docs/cugraph/source/nx_cugraph/index.rst
@@ -3,7 +3,7 @@ nx-cugraph
 
 ``nx-cugraph`` is a NetworkX backend that provides **GPU acceleration** to many popular NetworkX algorithms.
 
-By simply `installing and enabling nx-cugraph <https://github.com/rapidsai/cugraph/blob/HEAD/python/nx-cugraph/README.md#install>`_, users can see significant speedup on workflows where performance is hindered by the default NetworkX implementation.
+By simply `installing and enabling nx-cugraph <https://docs.rapids.ai/api/cugraph/stable/nx_cugraph/installation/>`_, users can see significant speedup on workflows where performance is hindered by the default NetworkX implementation.
 
 Users can have GPU-based, large-scale performance **without** changing their familiar and easy-to-use NetworkX code.
 
diff --git a/notebooks/cugraph_benchmarks/nx_cugraph_benchmark.ipynb b/notebooks/cugraph_benchmarks/nx_cugraph_benchmark.ipynb
deleted file mode 100644
index bc57947f200..00000000000
--- a/notebooks/cugraph_benchmarks/nx_cugraph_benchmark.ipynb
+++ /dev/null
@@ -1,365 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Benchmarking Performance of NetworkX without and with the RAPIDS GPU-based nx-cugraph backend\n",
-    "\n",
-    "This notebook collects the run-times without and with the nx-cugraph backend enabled for three popular NetworkX algorithms: Betweenness Centrality, Breadth First Search, and Louvain Community Detection.\n",
-    "\n",
-    "Here is a sample minimal script to demonstrate no-code-change GPU acceleration using nx-cugraph.\n",
-    "\n",
-    "----\n",
-    "bc_demo.ipy:\n",
-    "\n",
-    "```\n",
-    "import pandas as pd\n",
-    "import networkx as nx\n",
-    "\n",
-    "url = \"https://data.rapids.ai/cugraph/datasets/cit-Patents.csv\"\n",
-    "df = pd.read_csv(url, sep=\" \", names=[\"src\", \"dst\"], dtype=\"int32\")\n",
-    "G = nx.from_pandas_edgelist(df, source=\"src\", target=\"dst\")\n",
-    "\n",
-    "%time result = nx.betweenness_centrality(G, k=10)\n",
-    "```\n",
-    "----\n",
-    "Running it with the nx-cugraph backend looks like this:\n",
-    "```\n",
-    "user@machine:/# ipython bc_demo.ipy\n",
-    "CPU times: user 7min 38s, sys: 5.6 s, total: 7min 44s\n",
-    "Wall time: 7min 44s\n",
-    "\n",
-    "user@machine:/# NETWORKX_BACKEND_PRIORITY=cugraph ipython bc_demo.ipy\n",
-    "CPU times: user 18.4 s, sys: 1.44 s, total: 19.9 s\n",
-    "Wall time: 20 s\n",
-    "```\n",
-    "----\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First import the needed packages"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import pandas as pd\n",
-    "import networkx as nx"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This installs nx-cugraph if not already present."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "try: \n",
-    "    import nx_cugraph\n",
-    "except ModuleNotFoundError:\n",
-    "    os.system('conda install -c rapidsai -c conda-forge -c nvidia nx-cugraph')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download a patent citation dataset containing 3774768 nodes and 16518948 edges and loads it into a NetworkX graph."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "File ./data/cit-Patents.csv not found, downloading https://data.rapids.ai/cugraph/datasets/cit-Patents.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "filepath = \"./data/cit-Patents.csv\"\n",
-    "\n",
-    "if os.path.exists(filepath):\n",
-    "    url = filepath\n",
-    "else:\n",
-    "    url = \"https://data.rapids.ai/cugraph/datasets/cit-Patents.csv\"\n",
-    "    print(f\"File {filepath} not found, downloading {url}\")\n",
-    "\n",
-    "df = pd.read_csv(url, sep=\" \", names=[\"src\", \"dst\"], dtype=\"int32\")\n",
-    "G = nx.from_pandas_edgelist(df, source=\"src\", target=\"dst\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Define a function that can be used to run various NetworkX algorithms on the Graph created above. This can be used to compare run-times for NetworkX both without `nx-cugraph` and with `nx-cugraph` enabled.\n",
-    "\n",
-    "The following NetworkX calls will be run:\n",
-    "* [Betweenness Centrality](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html)\n",
-    "* [Breadth First Search](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_tree.html)\n",
-    "* [Louvain Community Detection](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.louvain.louvain_communities.html)\n",
-    "\n",
-    "This code does not require modification to use with nx-cugraph and can be used with NetworkX as-is even when no backends are installed."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def run_algos():\n",
-    "   print(\"\\nRunning Betweenness Centrality...\")\n",
-    "   %time nx.betweenness_centrality(G, k=10)\n",
-    "\n",
-    "   print(\"\\nRunning Breadth First Search (bfs_edges)...\")\n",
-    "   %time list(nx.bfs_edges(G, source=1))  # yields individual edges, use list() to force the full computation\n",
-    "\n",
-    "   print(\"\\nRunning Louvain...\")\n",
-    "   %time nx.community.louvain_communities(G, threshold=1e-04)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## NetworkX (no backend) Benchmark Runs\n",
-    "**_NOTE: NetworkX benchmarks without a backend for the graph used in this notebook can take very long time.  Using a Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz with 45GB of memory, the three algo runs took approximately 50 minutes._**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Running Betweenness Centrality...\n",
-      "CPU times: user 7min 47s, sys: 5.61 s, total: 7min 53s\n",
-      "Wall time: 7min 52s\n",
-      "\n",
-      "Running Breadth First Search (bfs_edges)...\n",
-      "CPU times: user 28.9 s, sys: 336 ms, total: 29.2 s\n",
-      "Wall time: 29.1 s\n",
-      "\n",
-      "Running Louvain...\n",
-      "CPU times: user 42min 46s, sys: 4.8 s, total: 42min 51s\n",
-      "Wall time: 42min 50s\n"
-     ]
-    }
-   ],
-   "source": [
-    "run_algos()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## NetworkX with `nx-cugraph` Benchmark Runs\n",
-    "Use the `nx.config` API introduced in ([NetworkX 3.3](https://networkx.org/documentation/stable/reference/backends.html#networkx.utils.configs.NetworkXConfig)) to configure NetworkX to use nx-cugraph.  Both options used below can also be set using environment variables."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Set the prioritized list of backends to automatically try. If none of the backends in the list\n",
-    "# support the algorithm, NetworkX will use the default implementation).\n",
-    "#\n",
-    "# This can also be set using the environment variable NETWORKX_BACKEND_PRIORITY which accepts a\n",
-    "# comma-separated list.\n",
-    "nx.config.backend_priority = [\"cugraph\"]  # Try the \"cugraph\" (nx-cugraph) backend first, then\n",
-    "                                          # fall back to NetworkX\n",
-    "#nx.config.backend_priority = []          # Do not use any backends\n",
-    "\n",
-    "# Enable caching of graph conversions. When set to False (the default) nx-cugraph will convert\n",
-    "# the CPU-based NetworkX graph object to a nx-cugraph GPU-based graph object each time an algorithm\n",
-    "# is run. When True, the conversion will happen once and be saved for future use *if* the graph has\n",
-    "# not been modified via a supported method such as G.add_edge(u, v, weight=val)\n",
-    "#\n",
-    "# This can also be set using the environment variable NETWORKX_CACHE_CONVERTED_GRAPHS\n",
-    "nx.config.cache_converted_graphs = True\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Note the warning message NetworkX generates to remind us a cached graph should not be manually mutated. This is shown because caching was enabled, and the initial call resulted in a cached graph conversion for use with subsequent nx-cugraph calls.**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Running Betweenness Centrality...\n",
-      "CPU times: user 17.9 s, sys: 1.5 s, total: 19.4 s\n",
-      "Wall time: 19.1 s\n",
-      "\n",
-      "Running Breadth First Search (bfs_edges)...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.10/site-packages/networkx/utils/backends.py:1101: UserWarning: Using cached graph for 'cugraph' backend in call to bfs_edges.\n",
-      "\n",
-      "For the cache to be consistent (i.e., correct), the input graph must not have been manually mutated since the cached graph was created. Examples of manually mutating the graph data structures resulting in an inconsistent cache include:\n",
-      "\n",
-      "    >>> G[u][v][key] = val\n",
-      "\n",
-      "and\n",
-      "\n",
-      "    >>> for u, v, d in G.edges(data=True):\n",
-      "    ...     d[key] = val\n",
-      "\n",
-      "Using methods such as `G.add_edge(u, v, weight=val)` will correctly clear the cache to keep it consistent. You may also use `G.__networkx_cache__.clear()` to manually clear the cache, or set `G.__networkx_cache__` to None to disable caching for G. Enable or disable caching via `nx.config.cache_converted_graphs` config.\n",
-      "  warnings.warn(warning_message)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 50.5 s, sys: 589 ms, total: 51 s\n",
-      "Wall time: 50.7 s\n",
-      "\n",
-      "Running Louvain...\n",
-      "CPU times: user 27.4 s, sys: 3.36 s, total: 30.7 s\n",
-      "Wall time: 30.6 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "run_algos()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The Betweenness Centrality call above resulted in a conversion from a NetworkX Graph to a nx-cugraph Graph due to it being the first to use nx-cugraph. However, since caching was enabled, a second call will show the run-time for Betweenness Centrality without the need to convert the graph."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Running Betweenness Centrality (again)...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.10/site-packages/networkx/utils/backends.py:1128: UserWarning: Using cached graph for 'cugraph' backend in call to betweenness_centrality.\n",
-      "\n",
-      "For the cache to be consistent (i.e., correct), the input graph must not have been manually mutated since the cached graph was created. Examples of manually mutating the graph data structures resulting in an inconsistent cache include:\n",
-      "\n",
-      "    >>> G[u][v][key] = val\n",
-      "\n",
-      "and\n",
-      "\n",
-      "    >>> for u, v, d in G.edges(data=True):\n",
-      "    ...     d[key] = val\n",
-      "\n",
-      "Using methods such as `G.add_edge(u, v, weight=val)` will correctly clear the cache to keep it consistent. You may also use `G.__networkx_cache__.clear()` to manually clear the cache, or set `G.__networkx_cache__` to None to disable caching for G. Enable or disable caching via `nx.config.cache_converted_graphs` config.\n",
-      "  warnings.warn(warning_message)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 1.84 s, sys: 312 ms, total: 2.15 s\n",
-      "Wall time: 2.12 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"\\nRunning Betweenness Centrality (again)...\")\n",
-    "%time result = nx.betweenness_centrality(G, k=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "___\n",
-    "Each user is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n",
-    "\n",
-    "Information on the U.S. Patent Citation Network dataset used in this notebook is as follows:\n",
-    "Authors: Jure Leskovec and Andrej Krevl\n",
-    "Title: SNAP Datasets, Stanford Large Network Dataset Collection\n",
-    "URL: http://snap.stanford.edu/data\n",
-    "Date: June 2014 \n",
-    "___\n",
-    "Copyright (c) 2024, NVIDIA CORPORATION.\n",
-    "\n",
-    "Licensed under the Apache License, Version 2.0 (the \"License\");  you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
-    "\n",
-    "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n",
-    "___"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.14"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/demo/accelerating_networkx.ipynb b/notebooks/demo/accelerating_networkx.ipynb
deleted file mode 100644
index 1a6c6cfb3f6..00000000000
--- a/notebooks/demo/accelerating_networkx.ipynb
+++ /dev/null
@@ -1,614 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "R2cpVp2WdOsp"
-      },
-      "source": [
-        "# NetworkX - Easy Graph Analytics\n",
-        "\n",
-        "NetworkX is the most popular library for graph analytics available in Python, or quite possibly any language. To illustrate this, NetworkX was downloaded more than 71 million times in September of 2024 alone, which is roughly 71 times more than the next most popular graph analytics library! [*](https://en.wikipedia.org/wiki/NetworkX) NetworkX has earned this popularity from its very easy-to-use API, the wealth of documentation and examples available, the large (and friendly) community behind it, and its easy installation which requires nothing more than Python.\n",
-        "\n",
-        "However, NetworkX users are familiar with the tradeoff that comes with those benefits. The pure-Python implementation often results in poor performance when graph data starts to reach larger scales, limiting the usefulness of the library for many real-world problems.\n",
-        "\n",
-        "# Accelerated NetworkX - Easy (and fast!) Graph Analytics\n",
-        "\n",
-        "To address the performance problem, NetworkX 3.0 introduced a mechanism to dispatch algorithm calls to alternate implementations. The NetworkX Python API remains the same but NetworkX will use more capable algorithm implementations provided by one or more backends. This approach means users don't have to give up NetworkX -or even change their code- in order to take advantage of GPU performance."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "xkg10FrNThrK"
-      },
-      "source": [
-        "# Let's Get the Environment Setup\n",
-        "This notebook will demonstrate NetworkX both with and without GPU acceleration provided by the `nx-cugraph` backend.\n",
-        "\n",
-        "`nx-cugraph` is available as a package installable using `pip`, `conda`, and [from source](https://github.com/rapidsai/nx-cugraph).  Before importing `networkx`, lets install `nx-cugraph` so it can be registered as an available backend by NetworkX when needed.  We'll use `pip` to install.\n",
-        "\n",
-        "NOTES:\n",
-        "* `nx-cugraph` requires a compatible NVIDIA GPU, NVIDIA CUDA and associated drivers, and a supported OS. Details about these and other installation prerequisites can be seen [here](https://docs.rapids.ai/install#system-req).\n",
-        "* The `nx-cugraph` package is currently hosted by NVIDIA and therefore the `--extra-index-url` option must be used.\n",
-        "* `nx-cugraph` is supported on specific 11.x and 12.x CUDA versions, and the major version number must be known in order to install the correct build (this is determined automatically when using `conda`).\n",
-        "\n",
-        "To find the CUDA major version on your system, run the following command:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "NMFwzc1I95BS"
-      },
-      "outputs": [],
-      "source": [
-        "!nvcc --version"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "i91Yj-yZ-nGS"
-      },
-      "source": [
-        "From the above output we can see we're using CUDA 12.x so we'll be installing `nx-cugraph-cu12`. If we were using CUDA 11.x, the package name would be `nx-cugraph-cu11`. We'll also be adding `https://pypi.nvidia.com` as an `--extra-index-url`:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "mYYN9EpnWphu"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install nx-cugraph-cu12 --extra-index-url=https://pypi.nvidia.com"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "0h1K-7tI_AZH"
-      },
-      "source": [
-        "Of course, we'll also be using `networkx`, which is already provided in the Colab environment. This notebook will be using features added in version 3.3, so we'll import it here to verify we have a compatible version."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "YTV0ZTME2tV6"
-      },
-      "outputs": [],
-      "source": [
-        "import networkx as nx\n",
-        "nx.__version__"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "UiZKOa3WC7be"
-      },
-      "source": [
-        "# Let's Start with Something Simple\n",
-        "\n",
-        "To begin, we'll compare NetworkX results without a backend to results of the same algorithm using the `nx-cugraph` backend on a small graph.  `nx.karate_club_graph()` returns an instance of the famous example graph consisting of 34 nodes and 78 edges from Zachary's paper, described [here](https://en.wikipedia.org/wiki/Zachary%27s_karate_club)."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "3atL3tI0frYm"
-      },
-      "source": [
-        "## Betweenness Centrality\n",
-        "[Betweenness Centrality](https://en.wikipedia.org/wiki/Betweenness_centrality) is a graph algorithm that computes a centrality score for each node (`v`) based on how many of the shortest paths between pairs of nodes in the graph pass through `v`. A higher centrality score represents a node that \"connects\" other nodes in a network more than that of a node with a lower score.\n",
-        "\n",
-        "First, let's create a NetworkX Graph instance of the the Karate Club graph and inspect it."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "JSw7EZ46-kRu"
-      },
-      "outputs": [],
-      "source": [
-        "G = nx.karate_club_graph()\n",
-        "G.number_of_nodes(), G.number_of_edges()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "_-E17u2gKgbC"
-      },
-      "source": [
-        "Next, let's run betweenness centrality and save the results.  Because the Karate Club graph is so small, this should not take long."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "qjxXXKJhKQ4s"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nx_bc_results = nx.betweenness_centrality(G)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ClrR3z9XMfLr"
-      },
-      "source": [
-        "Now, let's run the same algorithm on the same data using the `nx-cugraph` backend.\n",
-        "\n",
-        "There are several ways to instruct NetworkX to use a particular backend instead of the default implementation. Here, we will use the `config` API, which was added in NetworkX version 3.3.\n",
-        "\n",
-        "The following two lines set the backend to \"cugraph\" and enable graph conversion caching.\n",
-        "\n",
-        "Some notes:\n",
-        "* The standard convention for NetworkX backends is to name the package with a `nx-` prefix to denote that these are packages intended to be used with NetworkX, but the `nx-` prefix is not included when referring to them in NetworkX API calls. Here, `nx-cugraph` is the name of the backend package, and `\"cugraph\"` is the name NetworkX will use to refer to it.\n",
-        "* NetworkX can use multiple backends! `nx.config.backend_priority` is a list that can contain several backends, ordered based on priority. If a backend in the list cannot run a particular algorithm (either because it isn't supported in the backend, the algorithm doesn't support a particular option, or some other reason), NetworkX will try the next backend in the list. If no specified backend is able to run the algorithm, NetworkX will fall back to the default implementation.\n",
-        "* Many backends have their own data structures for representing an input graph, often optimized for that backend's implementation. Prior to running a backend algorithm, NetworkX will have the backend convert the standard NetworkX Graph instance to the backend-specific type. This conversion can be expensive, and rather than repeat it as part of each algorithm call, NetworkX can cache the conversion so it can be skipped on future calls if the graph doesn't change. This caching can save significant time and improve overall performance."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oFHwNqqsNsqS"
-      },
-      "outputs": [],
-      "source": [
-        "nx.config.backend_priority=[\"cugraph\"]  # NETWORKX_BACKEND_PRIORITY=cugraph\n",
-        "nx.config.cache_converted_graphs=True   # NETWORKX_CACHE_CONVERTED_GRAPHS=True"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "HrUeWRRQRzFP"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nxcg_bc_results = nx.betweenness_centrality(G)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "z1hxut3GTj5A"
-      },
-      "source": [
-        "You may have noticed that using the `nx-cugraph` backend resulted in a slightly slower execution time. This is not surprising when working with a graph this small, since the overhead of converting the graph for the first time and launching the algorithm kernel on the GPU is actually significantly more than the computation time itself.  We'll see later that this overhead is negligible when compared to the time saved when running on a GPU for larger graphs.\n",
-        "\n",
-        "Since we've enabled graph conversion caching, we can see that if we re-run the same call the execution time is noticeably shorter."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "7a0XvpUOr9Ju"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nxcg_bc_results = nx.betweenness_centrality(G)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ppjE5J5RscOe"
-      },
-      "source": [
-        "Notice the warning above about using the cache. This will only be raised **once** per graph instance (it can also be easily disabled), but its purpose is to point out that the cache should not be used if the Graph object will have its attribute dictionary modified directly. In this case and many others, we won't be modifying the dictionaries directly. Instead, we will use APIs such as `nx.set_node_attributes` which properly clear the cache, so it's safe for us to use the cache. Because of that, we'll disable the warning so we don't see it on other graphs in this session."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Namb5JLvwS-q"
-      },
-      "outputs": [],
-      "source": [
-        "import warnings\n",
-        "warnings.filterwarnings(\"ignore\", message=\"Using cached graph for 'cugraph' backend\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "BzGAphcILFsT"
-      },
-      "source": [
-        "Smaller graphs are also easy to visualize with NetworkX's plotting utilities. The flexibility of NetworkX's `Graph` instances make it trivial to add the betweenness centrality scores back to the graph object as node attributes. This will allow us to use those values for the visualization.\n",
-        "\n",
-        "In this case, we'll create new attributes for each node called \"nx_bc\" for the default NetworkX results, and \"nxcg_bc\" for the nx-cugraph results. We'll use those values to assign the color for each node and plot two graphs side-by-side. This will make it easy to visually validate that the nodes with the higher centrality scores for both implementations match and do indeed appear to be more \"central\" to other nodes."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1coV6ZfcUoqI"
-      },
-      "outputs": [],
-      "source": [
-        "nx.set_node_attributes(G, nx_bc_results, \"nx_bc\")\n",
-        "nx.set_node_attributes(G, nxcg_bc_results, \"nxcg_bc\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Sba2iYJgLoN2"
-      },
-      "outputs": [],
-      "source": [
-        "# Configure plot size and layout/position for each node\n",
-        "import matplotlib.pyplot as plt\n",
-        "plt.rcParams['figure.figsize'] = [12, 8]\n",
-        "pos = nx.spring_layout(G)\n",
-        "\n",
-        "# Assign colors for each set of betweenness centrality results\n",
-        "nx_colors = [G.nodes[n][\"nx_bc\"] for n in G.nodes()]\n",
-        "nxcg_colors = [G.nodes[n][\"nxcg_bc\"] for n in G.nodes()]\n",
-        "\n",
-        "# Plot the graph and color each node corresponding to NetworkX betweenness centrality values\n",
-        "plt.subplot(1, 2, 1)\n",
-        "nx.draw(G, pos=pos, with_labels=True, node_color=nx_colors)\n",
-        "\n",
-        "# Plot the graph and color each node corresponding to nx-cugraph betweenness centrality values\n",
-        "plt.subplot(1, 2, 2)\n",
-        "nx.draw(G, pos=pos, with_labels=True, node_color=nxcg_colors)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "dJXH4Zn5VNSg"
-      },
-      "source": [
-        "As we can see, the same two nodes (`0` and `33`) are the two most central in both graphs, followed by `2`, `31`, and `32`.\n",
-        "\n",
-        "## PageRank\n",
-        "Another popular algorithm is [PageRank](https://en.wikipedia.org/wiki/PageRank). PageRank also assigns scores to each node, but these scores are based on analyzing links to each node to determine relative \"importance\" within the graph.\n",
-        "\n",
-        "Let's update the config to use the default NetworkX implementation and run `nx.pagerank`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "9CdYNk62E1v_"
-      },
-      "outputs": [],
-      "source": [
-        "nx.config.backend_priority=[]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Jo39YxVmYolq"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nx_pr_results = nx.pagerank(G)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "sV6dM8ToZDiC"
-      },
-      "source": [
-        "We could set `nx.config.backend_priority` again to list `\"cugraph\"` as the backend, but let's instead show how the `backend` kwarg can be used to override the priority list and force a specific backend to be used."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "oMSvQVGKY0rn"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nxcg_pr_results = nx.pagerank(G, backend=\"cugraph\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZGux_8xFZneI"
-      },
-      "source": [
-        "In this example, instead of plotting the graph to show that the results are identical, we can compare them directly using the saved values from both runs."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RcmtdFy4Zw7p"
-      },
-      "outputs": [],
-      "source": [
-        "sorted(nx_pr_results) == sorted(nxcg_pr_results)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mefjUEAnZ4pq"
-      },
-      "source": [
-        "# Working with Bigger Data"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "yLY-yl6PuNYo"
-      },
-      "source": [
-        "Now we'll look at a larger dataset from https://snap.stanford.edu/data/cit-Patents.html which contains citations across different U.S. patents granted from January 1, 1963 to December 30, 1999. The dataset represents 16.5M citations (edges) between 3.77M patents (nodes).\n",
-        "\n",
-        "This will demonstrate that data of this size starts to push the limits of the default pure-Python NetworkX implementation."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lyYF0LbtFwjh"
-      },
-      "outputs": [],
-      "source": [
-        "# The locale encoding may have been modified from the plots above, reset here to run shell commands\n",
-        "import locale\n",
-        "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
-        "!wget https://data.rapids.ai/cugraph/datasets/cit-Patents.csv  # Skip if cit-Patents.csv already exists.\n",
-        "# !wget https://snap.stanford.edu/data/cit-Patents.txt.gz  # Skip if cit-Patents.txt.gz already exists."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "kjGINYphQSQ2"
-      },
-      "outputs": [],
-      "source": [
-        "%load_ext cudf.pandas\n",
-        "import pandas as pd"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "iV4DieGZOalc"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "df = pd.read_csv(\"cit-Patents.csv\",\n",
-        "                sep=\" \",\n",
-        "                names=[\"src\", \"dst\"],\n",
-        "                dtype=\"int32\",\n",
-        ")\n",
-        "# df = pd.read_csv(\"cit-Patents.txt.gz\",\n",
-        "#                  compression=\"gzip\",\n",
-        "#                  skiprows=4,\n",
-        "#                  sep=\"\\t\",\n",
-        "#                  names=[\"src\", \"dst\"],\n",
-        "#                  dtype=\"int32\",\n",
-        "# )"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "PREA67u4eKat"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "G = nx.from_pandas_edgelist(df, source=\"src\", target=\"dst\")\n",
-        "G.number_of_nodes(), G.number_of_edges()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NcsUxBqpu4zY"
-      },
-      "source": [
-        "By default, `nx.betweenness_centrality` will perform an all-pairs shortest path analysis when determining the centrality scores for each node. However, due to the much larger size of this graph, determining the shortest path for all pairs of nodes in the graph is not feasible. Instead, we'll use the parameter `k` to limit the number of shortest path computations used for determining the centrality scores, at the expense of accuracy. As we'll see when using a dataset this size with `nx.betweenness_centrality`, we have to limit `k` to `1` which is not practical but is sufficient here for demonstration purposes (since anything larger than `1` will result in many minutes of execution time)."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gNDWbj3kAk3j"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "bc_results = nx.betweenness_centrality(G, k=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "NB8xmxMd1PlX"
-      },
-      "source": [
-        "Now we'll configure NetworkX to use the `nx-cugraph` backend (again, using the name convention that drops the package name's `nx-` prefix) and run the same call. Because this is a Graph that `nx-cugraph` hasn't seen before, the runtime will include the time to convert and cache a GPU-based graph."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "xUYNG1xhvbWc"
-      },
-      "outputs": [],
-      "source": [
-        "nx.config.backend_priority = [\"cugraph\"]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "cmK8ZuQGvfPo"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "bc_results = nx.betweenness_centrality(G, k=1)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "vdHb1YXP15TZ"
-      },
-      "source": [
-        "Let's run betweenness centrality again, now with a more useful number of samples by setting `k=100`."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "fKjIrzL-vrGS"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "bc_results = nx.betweenness_centrality(G, k=100)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "QeMcrAX2HZSM"
-      },
-      "source": [
-        "Let's also run pagerank on the same dataset to compare."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "gR8ID6ekHgHt"
-      },
-      "outputs": [],
-      "source": [
-        "nx.config.backend_priority = []"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "rTFuvX5wb_c1"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nx_pr_results = nx.pagerank(G)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "8sJx9aeJV9hv"
-      },
-      "outputs": [],
-      "source": [
-        "%%time\n",
-        "nxcg_pr_results = nx.pagerank(G, backend=\"cugraph\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "wGOVQ6ZyY4Ih"
-      },
-      "outputs": [],
-      "source": [
-        "sorted(nx_pr_results) == sorted(nxcg_pr_results)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "k2DfAaZaDIBj"
-      },
-      "source": [
-        "---\n",
-        "<i>\n",
-        "Information on the U.S. Patent Citation Network dataset used in this notebook is as follows:\n",
-        "<br>Authors: Jure Leskovec and Andrej Krevl\n",
-        "<br>Title: SNAP Datasets, Stanford Large Network Dataset Collection\n",
-        "<br>URL: http://snap.stanford.edu/data\n",
-        "<br>Date: June 2014\n",
-        "</i>\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "accelerator": "GPU",
-    "colab": {
-      "gpuType": "T4",
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.12.4"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/notebooks/demo/nx_cugraph_demo.ipynb b/notebooks/demo/nx_cugraph_demo.ipynb
deleted file mode 100644
index f1ce80aa188..00000000000
--- a/notebooks/demo/nx_cugraph_demo.ipynb
+++ /dev/null
@@ -1,672 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# `nx-cugraph`: a NetworkX backend that provides GPU acceleration with RAPIDS cuGraph"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook will demonstrate the `nx-cugraph` NetworkX backend using the NetworkX betweenness_centrality algorithm.\n",
-    "\n",
-    "## Background\n",
-    "Networkx version 3.0 introduced a dispatching mechanism that allows users to configure NetworkX to dispatch various algorithms to third-party backends. Backends can provide different implementations of graph algorithms, allowing users to take advantage of capabilities not available in NetworkX. `nx-cugraph` is a NetworkX backend provided by the [RAPIDS](https://rapids.ai) cuGraph project that adds GPU acceleration to greatly improve performance.\n",
-    "\n",
-    "## System Requirements\n",
-    "Using `nx-cugraph` with this notebook requires the following: \n",
-    "- NVIDIA GPU, Pascal architecture or later\n",
-    "- CUDA 11.2, 11.4, 11.5, 11.8, or 12.0\n",
-    "- Python versions 3.10, 3.11, or 3.12\n",
-    "- NetworkX >= version 3.2\n",
-    "  - _NetworkX 3.0 supports dispatching and is compatible with `nx-cugraph`, but this notebook will demonstrate features added in 3.2_\n",
-    "  - At the time of this writing, NetworkX 3.2 is only available from source and can be installed by following the [development version install instructions](https://github.com/networkx/networkx/blob/main/INSTALL.rst#install-the-development-version).\n",
-    "- Pandas\n",
-    "\n",
-    "More details about system requirements can be found in the [RAPIDS System Requirements documentation](https://docs.rapids.ai/install#system-req)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Installation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Assuming NetworkX >= 3.2 has been installed using the [development version install instructions](https://github.com/networkx/networkx/blob/main/INSTALL.rst#install-the-development-version), `nx-cugraph` can be installed using either `conda` or `pip`.  \n",
-    "\n",
-    "#### conda\n",
-    "```\n",
-    "conda install -c rapidsai-nightly -c conda-forge -c nvidia nx-cugraph\n",
-    "```\n",
-    "#### pip\n",
-    "```\n",
-    "python -m pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com\n",
-    "```\n",
-    "#### _Notes:_\n",
-    " * nightly wheel builds will not be available until the 23.12 release, therefore the index URL for the stable release version is being used in the pip install command above.\n",
-    " * Additional information relevant to installing any RAPIDS package can be found [here](https://rapids.ai/#quick-start).\n",
-    " * If you installed any of the packages described here since running this notebook, you may need to restart the kernel to have them visible to this notebook."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Notebook Helper Functions\n",
-    "\n",
-    "A few helper functions will be defined here that will be used in order to help keep this notebook easy to read."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "def reimport_networkx():\n",
-    "    \"\"\"\n",
-    "    Re-imports networkx for demonstrating different backend configuration\n",
-    "    options applied at import-time. This is only needed for demonstration\n",
-    "    purposes since other mechanisms are available for runtime configuration.\n",
-    "    \"\"\"\n",
-    "    # Using importlib.reload(networkx) has several caveats (described here:\n",
-    "    # https://docs.python.org/3/library/imp.html?highlight=reload#imp.reload)\n",
-    "    # which result in backend configuration not being re-applied correctly.\n",
-    "    # Instead, manually remove all modules and re-import\n",
-    "    nx_mods = [m for m in sys.modules.keys()\n",
-    "               if (m.startswith(\"networkx\") or m.startswith(\"nx_cugraph\"))]\n",
-    "    for m in nx_mods:\n",
-    "        sys.modules.pop(m)\n",
-    "    import networkx\n",
-    "    return networkx\n",
-    "\n",
-    "\n",
-    "from pathlib import Path\n",
-    "import requests\n",
-    "import gzip\n",
-    "import pandas as pd\n",
-    "def create_cit_patents_graph(verbose=True):\n",
-    "    \"\"\"\n",
-    "    Downloads the cit-Patents dataset (if not previously downloaded), reads\n",
-    "    it, and creates a nx.DiGraph from it and returns it.\n",
-    "    cit-Patents is described here:\n",
-    "    https://snap.stanford.edu/data/cit-Patents.html\n",
-    "    \"\"\"\n",
-    "    url = \"https://snap.stanford.edu/data/cit-Patents.txt.gz\"\n",
-    "    gz_file_name = Path(url.split(\"/\")[-1])\n",
-    "    csv_file_name = Path(gz_file_name.stem)\n",
-    "    if csv_file_name.exists():\n",
-    "        if verbose: print(f\"{csv_file_name} already exists, not downloading.\")\n",
-    "    else:\n",
-    "        if verbose: print(f\"downloading {url}...\", end=\"\", flush=True)\n",
-    "        req = requests.get(url)\n",
-    "        open(gz_file_name, \"wb\").write(req.content)\n",
-    "        if verbose: print(\"done\")\n",
-    "        if verbose: print(f\"unzipping {gz_file_name}...\", end=\"\", flush=True)\n",
-    "        with gzip.open(gz_file_name, \"rb\") as gz_in:\n",
-    "            with open(csv_file_name, \"wb\") as txt_out:\n",
-    "                txt_out.write(gz_in.read())\n",
-    "        if verbose: print(\"done\")\n",
-    "\n",
-    "    if verbose: print(\"reading csv to dataframe...\", end=\"\", flush=True)\n",
-    "    pandas_edgelist = pd.read_csv(\n",
-    "        csv_file_name.name,\n",
-    "        skiprows=4,\n",
-    "        delimiter=\"\\t\",\n",
-    "        names=[\"src\", \"dst\"],\n",
-    "        dtype={\"src\":\"int32\", \"dst\":\"int32\"},\n",
-    "    )\n",
-    "    if verbose: print(\"done\")\n",
-    "    if verbose: print(\"creating NX graph from dataframe...\", end=\"\", flush=True)\n",
-    "    G = nx.from_pandas_edgelist(\n",
-    "        pandas_edgelist, source=\"src\", target=\"dst\", create_using=nx.DiGraph\n",
-    "    )\n",
-    "    if verbose: print(\"done\")\n",
-    "    return G"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "## Running `betweenness_centrality`\n",
-    "Let's start by running `betweenness_centrality` on the Karate Club graph using the default NetworkX implementation."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Zachary's Karate Club\n",
-    "\n",
-    "Zachary's Karate Club is a small dataset consisting of 34 nodes and 78 edges which represent the friendships between members of a karate club. This dataset is small enough to make comparing results between NetworkX and `nx-cugraph` easy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import networkx as nx\n",
-    "karate_club_graph = nx.karate_club_graph()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Having NetworkX compute the `betweenness_centrality` values for each node on this graph is quick and easy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.51 ms ± 1.02 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit global karate_nx_bc_results\n",
-    "karate_nx_bc_results = nx.betweenness_centrality(karate_club_graph)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Automatic GPU acceleration\n",
-    "When `nx-cugraph` is installed, NetworkX will detect it on import and make it available as a backend for APIs supported by that backend.  However, NetworkX does not assume the user always wants to use a particular backend, and instead looks at various configuration mechanisms in place for users to specify how NetworkX should use installed backends. Since NetworkX was not configured to use a backend for the above `betweenness_centrality` call, it used the default implementation provided by NetworkX.\n",
-    "\n",
-    "The first configuration mechanism to be demonstrated below is the `NETWORKX_AUTOMATIC_BACKENDS` environment variable.  This environment variable directs NetworkX to use the backend specified everywhere it's supported and does not require the user to modify any of their existing NetworkX code.\n",
-    "\n",
-    "To use it, a user sets `NETWORKX_AUTOMATIC_BACKENDS` in their shell to the backend they'd like to use.  If a user has more than one backend installed, the environment variable can also accept a comma-separated list of backends, ordered by priority in which NetworkX should use them, where the first backend that supports a particular API call will be used.  For example:\n",
-    "```\n",
-    "bash> export NETWORKX_AUTOMATIC_BACKENDS=cugraph\n",
-    "bash> python my_nx_app.py  # uses nx-cugraph wherever possible, then falls back to default implementation where it's not.\n",
-    "```\n",
-    "or in the case of multiple backends installed\n",
-    "```\n",
-    "bash> export NETWORKX_AUTOMATIC_BACKENDS=cugraph,graphblas\n",
-    "bash> python my_nx_app.py  # uses nx-cugraph if possible, then nx-graphblas if possible, then default implementation.\n",
-    "```\n",
-    "\n",
-    "NetworkX looks at the environment variable and the installed backends at import time, and will not re-examine the environment after that.  Because `networkx` was already imported in this notebook, the `reimport_nx()` utility will be called after the `os.environ` dictionary is updated to simulate an environment variable being set in the shell.\n",
-    "\n",
-    "**Please note, this is only needed for demonstration purposes to compare runs both with and without fully-automatic backend use enabled.**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"NETWORKX_AUTOMATIC_BACKENDS\"] = \"cugraph\"\n",
-    "nx = reimport_networkx()\n",
-    "# reimporting nx requires reinstantiating Graphs since python considers\n",
-    "# types from the prior nx import != types from the reimported nx\n",
-    "karate_club_graph = nx.karate_club_graph()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Once the environment is updated, re-running the same `betweenness_centrality` call on the same graph requires no code changes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "43.9 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit global karate_cg_bc_results\n",
-    "karate_cg_bc_results = nx.betweenness_centrality(karate_club_graph)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "We may see that the same computation actually took *longer* using `nx-cugraph`. This is not too surprising given how small the graph is, since there's a small amount of overhead to copy data to and from the GPU which becomes more obvious on very small graphs.  We'll see with a larger graph how this overhead becomes negligible."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Results Comparison\n",
-    "\n",
-    "Let's examine the results of each run to see how they compare.  \n",
-    "The `betweenness_centrality` results are a dictionary mapping vertex IDs to betweenness_centrality scores.  The score itself is usually not as important as the relative rank of each vertex ID (e.g. vertex A is ranked higher than vertex B in both sets of results)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "NX: (0, 0.437635), CG: (0, 0.437635)\n",
-      "NX: (33, 0.304075), CG: (33, 0.304075)\n",
-      "NX: (32, 0.145247), CG: (32, 0.145247)\n",
-      "NX: (2, 0.143657), CG: (2, 0.143657)\n",
-      "NX: (31, 0.138276), CG: (31, 0.138276)\n",
-      "NX: (8, 0.055927), CG: (8, 0.055927)\n",
-      "NX: (1, 0.053937), CG: (1, 0.053937)\n",
-      "NX: (13, 0.045863), CG: (13, 0.045863)\n",
-      "NX: (19, 0.032475), CG: (19, 0.032475)\n",
-      "NX: (5, 0.029987), CG: (5, 0.029987)\n",
-      "NX: (6, 0.029987), CG: (6, 0.029987)\n",
-      "NX: (27, 0.022333), CG: (27, 0.022333)\n",
-      "NX: (23, 0.017614), CG: (23, 0.017614)\n",
-      "NX: (30, 0.014412), CG: (30, 0.014412)\n",
-      "NX: (3, 0.011909), CG: (3, 0.011909)\n",
-      "NX: (25, 0.003840), CG: (25, 0.003840)\n",
-      "NX: (29, 0.002922), CG: (29, 0.002922)\n",
-      "NX: (24, 0.002210), CG: (24, 0.002210)\n",
-      "NX: (28, 0.001795), CG: (28, 0.001795)\n",
-      "NX: (9, 0.000848), CG: (9, 0.000848)\n",
-      "NX: (4, 0.000631), CG: (4, 0.000631)\n",
-      "NX: (10, 0.000631), CG: (10, 0.000631)\n",
-      "NX: (7, 0.000000), CG: (7, 0.000000)\n",
-      "NX: (11, 0.000000), CG: (11, 0.000000)\n",
-      "NX: (12, 0.000000), CG: (12, 0.000000)\n",
-      "NX: (14, 0.000000), CG: (14, 0.000000)\n",
-      "NX: (15, 0.000000), CG: (15, 0.000000)\n",
-      "NX: (16, 0.000000), CG: (16, 0.000000)\n",
-      "NX: (17, 0.000000), CG: (17, 0.000000)\n",
-      "NX: (18, 0.000000), CG: (18, 0.000000)\n",
-      "NX: (20, 0.000000), CG: (20, 0.000000)\n",
-      "NX: (21, 0.000000), CG: (21, 0.000000)\n",
-      "NX: (22, 0.000000), CG: (22, 0.000000)\n",
-      "NX: (26, 0.000000), CG: (26, 0.000000)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# The lists contain tuples of (vertex ID, betweenness_centrality score),\n",
-    "# sorted based on the score.\n",
-    "nx_sorted = sorted(karate_nx_bc_results.items(), key=lambda t:t[1], reverse=True)\n",
-    "cg_sorted = sorted(karate_cg_bc_results.items(), key=lambda t:t[1], reverse=True)\n",
-    "\n",
-    "for i in range(len(nx_sorted)):\n",
-    "    print(\"NX: (%d, %.6f), CG: (%d, %.6f)\" % (nx_sorted[i] + cg_sorted[i]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Here we can see that the results match exactly as expected.  \n",
-    "\n",
-    "For larger graphs, results are harder to compare given that `betweenness_centrality` is an approximation algorithm influenced by the random selection of paths used to compute the betweenness_centrality score of each vertex.  The argument `k` is used for limiting the number of paths used in the computation, since using every path for every vertex would be prohibitively expensive for large graphs.  For small graphs, `k` need not be specified, which allows `betweenness_centrality` to use all paths for all vertices and makes for an easier comparison."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### `betweenness_centrality` on larger graphs - The U.S. Patent Citation Network<sup>1</sup>\n",
-    "\n",
-    "The U.S. Patent Citation Network dataset is much larger with over 3.7M nodes and over 16.5M edges and demonstrates how `nx-cugraph` enables NetworkX to run `betweenness_centrality` on graphs this large (and larger) in seconds instead of minutes.\n",
-    "\n",
-    "#### NetworkX default implementation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "downloading https://snap.stanford.edu/data/cit-Patents.txt.gz...done\n",
-      "unzipping cit-Patents.txt.gz...done\n",
-      "reading csv to dataframe...done\n",
-      "creating NX graph from dataframe...done\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "# Unset NETWORKX_AUTOMATIC_BACKENDS so the default NetworkX implementation is used\n",
-    "os.environ.pop(\"NETWORKX_AUTOMATIC_BACKENDS\", None)\n",
-    "nx = reimport_networkx()\n",
-    "# Create the cit-Patents graph - this will also download the dataset if not previously downloaded\n",
-    "cit_patents_graph = create_cit_patents_graph()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Since this is a large graph, a k value must be set so the computation returns in a reasonable time\n",
-    "k = 40"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Because this run will take time, `%%timeit` is restricted to a single pass.\n",
-    "\n",
-    "*NOTE: this run may take approximately 1 minute*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1min 4s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 1\n",
-    "results = nx.betweenness_centrality(cit_patents_graph, k=k)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Something to note is that `%%timeit` disables garbage collection by default, which may not be something a user is able to do. To see a more realistic real-world run time, `gc` can be enabled."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# import and run the garbage collector upfront prior to using it in the benchmark\n",
-    "import gc\n",
-    "gc.collect()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "*NOTE: this run may take approximately 7 minutes!*"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "6min 50s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 1 gc.enable()\n",
-    "nx.betweenness_centrality(cit_patents_graph, k=k)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### `nx-cugraph`\n",
-    "\n",
-    "Running on a GPU using `nx-cugraph` can result in a tremendous speedup, especially when graphs reach sizes larger than a few thousand nodes or `k` values become larger to increase accuracy.\n",
-    "\n",
-    "Rather than setting the `NETWORKX_AUTOMATIC_BACKENDS` environment variable and re-importing again, this example will demonstrate the `backend=` keyword argument to explicitly direct the NetworkX dispatcher to use the `cugraph` backend."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "10.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 1 gc.enable()\n",
-    "nx.betweenness_centrality(cit_patents_graph, k=k, backend=\"cugraph\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "k = 150"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "11.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 1 gc.enable()\n",
-    "nx.betweenness_centrality(cit_patents_graph, k=k, backend=\"cugraph\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "For the same graph and the same `k` value, the `\"cugraph\"` backend returns results in seconds instead of minutes.  Increasing the `k` value has very little relative impact to runtime due to the high parallel processing ability of the GPU, allowing the user to get improved accuracy for virtually no additional cost."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Type-based dispatching\n",
-    "\n",
-    "NetworkX also supports automatically dispatching to backends associated with specific graph types.  This requires the user to write code for a specific backend, and therefore requires the backend to be installed, but has the advantage of ensuring a particular behavior without the potential for runtime conversions.\n",
-    "\n",
-    "To use type-based dispatching with `nx-cugraph`, the user must import the backend directly in their code to access the utilities provided to create a Graph instance specifically for the `nx-cugraph` backend."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import nx_cugraph as nxcg"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "The `from_networkx()` API will copy the data from the NetworkX graph instance to the GPU and return a new `nx-cugraph` graph instance.  By passing an explicit `nx-cugraph` graph, the NetworkX dispatcher will automatically call the `\"cugraph\"` backend (and only the `\"cugraph\"` backend) without requiring future conversions to copy data to the GPU."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "7.92 s ± 2.85 ms per loop (mean ± std. dev. of 2 runs, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 2 global nxcg_cit_patents_graph\n",
-    "nxcg_cit_patents_graph = nxcg.from_networkx(cit_patents_graph)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "3.14 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%timeit -r 1 gc.enable()\n",
-    "nx.betweenness_centrality(nxcg_cit_patents_graph, k=k)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Conclusion\n",
-    "\n",
-    "This notebook demonstrated `nx-cugraph`'s support for `betweenness_centrality`.  At the time of this writing, `nx-cugraph` also provides support for `edge_netweenness_centrality` and `louvain_communities`.  Other algorithms are scheduled to be supported based on their availability in the cuGraph [pylibcugraph](https://github.com/rapidsai/cugraph/tree/branch-23.10/python/pylibcugraph/pylibcugraph) package and demand by the NetworkX community.\n",
-    "\n",
-    "#### Benchmark Results\n",
-    "The results included in this notebook were generated on a workstation with the following hardware:\n",
-    "\n",
-    "<table align=\"left\">\n",
-    "    <tr><td>CPU:</td><td>Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz, 45GB</td></tr>\n",
-    "    <tr><td>GPU:</td><td>Quatro RTX 8000, 50GB</td></tr>\n",
-    "</table>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<sup>1</sup> Information on the U.S. Patent Citation Network dataset used in this notebook is as follows:\n",
-    "<table align=\"left\">\n",
-    "    <tr><td>Authors:</td><td>Jure Leskovec and Andrej Krevl</td></tr>\n",
-    "    <tr><td>Title:</td><td>SNAP Datasets, Stanford Large Network Dataset Collection</td></tr>\n",
-    "    <tr><td>URL:</td><td>http://snap.stanford.edu/data</td></tr>\n",
-    "    <tr><td>Date:</td><td>June 2014</td></tr>\n",
-    "</table>\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
index 34ee72e799b..c9fb73babb8 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
@@ -17,6 +17,7 @@
 
 import pytest
 import networkx as nx
+import pandas as pd
 
 import cudf
 import cugraph
@@ -153,6 +154,54 @@ def networkx_call(M, benchmark_callable=None):
     return src, dst, coeff
 
 
+# FIXME: This compare is shared across several tests... it should be
+#        a general utility
+def compare(src1, dst1, val1, src2, dst2, val2):
+    #
+    #  We will do comparison computations by using dataframe
+    #  merge functions (essentially doing fast joins).  We
+    #  start by making two data frames
+    #
+    df1 = cudf.DataFrame()
+    df1["src1"] = src1
+    df1["dst1"] = dst1
+    if val1 is not None:
+        df1["val1"] = val1
+
+    df2 = cudf.DataFrame()
+    df2["src2"] = src2
+    df2["dst2"] = dst2
+    if val2 is not None:
+        df2["val2"] = val2
+
+    #
+    #  Check to see if all pairs in the original data frame
+    #  still exist in the new data frame.  If we join (merge)
+    #  the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i])
+    #  then we should get exactly the same number of entries in
+    #  the data frame if we did not lose any data.
+    #
+    join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"])
+
+    if len(df1) != len(join):
+        join2 = df1.merge(
+            df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"]
+        )
+        pd.set_option("display.max_rows", 500)
+        print("df1 = \n", df1.sort_values(["src1", "dst1"]))
+        print("df2 = \n", df2.sort_values(["src2", "dst2"]))
+        print(
+            "join2 = \n",
+            join2.sort_values(["src1", "dst1"])
+            .to_pandas()
+            .query("src2.isnull()", engine="python"),
+        )
+
+    assert len(df1) == len(join)
+
+    assert_series_equal(join["val1"], join["val2"], check_names=False)
+
+
 # =============================================================================
 # Pytest Fixtures
 # =============================================================================
@@ -415,7 +464,7 @@ def test_all_pairs_jaccard_with_topk():
     jaccard_results = (
         jaccard_results[jaccard_results["first"] != jaccard_results["second"]]
         .sort_values(["jaccard_coeff", "first", "second"], ascending=False)
-        .reset_index(drop=True)[:topk]
+        .reset_index(drop=True)
     )
 
     # Call all-pairs Jaccard
@@ -425,6 +474,37 @@ def test_all_pairs_jaccard_with_topk():
         .reset_index(drop=True)
     )
 
-    assert_frame_equal(
-        jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True
+    # 1. All pair similarity might return different top pairs k pairs
+    # which are still valid hence, ensure the pairs returned by all-pairs
+    # exists, and that any results better than the k-th result are included
+    # in the result
+
+    # FIXME: This problem could exist in overlap, cosine and sorensen,
+    #        consider replicating this code or making a share comparison
+    #        function
+    worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min()
+    better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff]
+
+    compare(
+        all_pairs_jaccard_results["first"],
+        all_pairs_jaccard_results["second"],
+        all_pairs_jaccard_results["jaccard_coeff"],
+        jaccard_results["first"],
+        jaccard_results["second"],
+        jaccard_results["jaccard_coeff"],
+    )
+
+    compare(
+        better_than_k["first"],
+        better_than_k["second"],
+        better_than_k["jaccard_coeff"],
+        all_pairs_jaccard_results["first"],
+        all_pairs_jaccard_results["second"],
+        all_pairs_jaccard_results["jaccard_coeff"],
+    )
+
+    # 2. Ensure the coefficient scores are still the highest
+    assert_series_equal(
+        all_pairs_jaccard_results["jaccard_coeff"],
+        jaccard_results["jaccard_coeff"][:topk],
     )
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
index 4c30f149ea5..5369398fa16 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
@@ -157,6 +157,8 @@ def networkx_call(M, benchmark_callable=None):
     return src, dst, coeff
 
 
+# FIXME: This compare is shared across several tests... it should be
+#        a general utility
 def compare(src1, dst1, val1, src2, dst2, val2):
     #
     #  We will do comparison computations by using dataframe
@@ -200,6 +202,8 @@ def compare(src1, dst1, val1, src2, dst2, val2):
 
     assert len(df1) == len(join)
 
+    assert_series_equal(join["val1"], join["val2"], check_names=False)
+
 
 # =============================================================================
 # Pytest Fixtures
@@ -456,7 +460,7 @@ def test_all_pairs_sorensen_with_topk():
     sorensen_results = (
         sorensen_results[sorensen_results["first"] != sorensen_results["second"]]
         .sort_values(["sorensen_coeff", "first", "second"], ascending=False)
-        .reset_index(drop=True)[:topk]
+        .reset_index(drop=True)
     )
 
     # Call all-pairs sorensen
@@ -468,7 +472,14 @@ def test_all_pairs_sorensen_with_topk():
 
     # 1. All pair similarity might return different top pairs k pairs
     # which are still valid hence, ensure the pairs returned by all-pairs
-    # exists.
+    # exists, and that any results better than the k-th result are included
+    # in the result
+
+    # FIXME: This problem could exist in overlap, cosine and jaccard,
+    #        consider replicating this code or making a share comparison
+    #        function
+    worst_coeff = all_pairs_sorensen_results["sorensen_coeff"].min()
+    better_than_k = sorensen_results[sorensen_results["sorensen_coeff"] > worst_coeff]
 
     compare(
         all_pairs_sorensen_results["first"],
@@ -479,6 +490,15 @@ def test_all_pairs_sorensen_with_topk():
         sorensen_results["sorensen_coeff"],
     )
 
+    compare(
+        better_than_k["first"],
+        better_than_k["second"],
+        better_than_k["sorensen_coeff"],
+        all_pairs_sorensen_results["first"],
+        all_pairs_sorensen_results["second"],
+        all_pairs_sorensen_results["sorensen_coeff"],
+    )
+
     # 2. Ensure the coefficient scores are still the highest
     assert_series_equal(
         all_pairs_sorensen_results["sorensen_coeff"],
diff --git a/python/nx-cugraph/.flake8 b/python/nx-cugraph/.flake8
deleted file mode 100644
index cdda8d1080f..00000000000
--- a/python/nx-cugraph/.flake8
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-[flake8]
-max-line-length = 88
-inline-quotes = "
-extend-ignore =
-    B020,
-    E203,
-    SIM105,
-    SIM401,
-# E203 whitespace before ':' (to be compatible with black)
-per-file-ignores =
-    nx_cugraph/tests/*.py:T201,
-    __init__.py:F401,F403,
-    _nx_cugraph/__init__.py:E501,
diff --git a/python/nx-cugraph/LICENSE b/python/nx-cugraph/LICENSE
deleted file mode 120000
index 30cff7403da..00000000000
--- a/python/nx-cugraph/LICENSE
+++ /dev/null
@@ -1 +0,0 @@
-../../LICENSE
\ No newline at end of file
diff --git a/python/nx-cugraph/Makefile b/python/nx-cugraph/Makefile
deleted file mode 100644
index 6500d834ee7..00000000000
--- a/python/nx-cugraph/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-SHELL= /bin/bash
-
-.PHONY: all
-all: plugin-info lint readme
-
-.PHONY: lint
-lint:
-	git ls-files | xargs pre-commit run --config lint.yaml --files || true
-
-.PHONY: lint-update
-lint-update:
-	pre-commit autoupdate --config lint.yaml
-
-.PHONY: plugin-info
-plugin-info:
-	python _nx_cugraph/__init__.py
-
-objects.inv:
-	wget https://networkx.org/documentation/stable/objects.inv
-
-.PHONY: readme
-readme: objects.inv
-	python scripts/update_readme.py README.md objects.inv
diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md
deleted file mode 100644
index 8cc3a5d90df..00000000000
--- a/python/nx-cugraph/README.md
+++ /dev/null
@@ -1,278 +0,0 @@
-# nx-cugraph
-
-## Description
-[RAPIDS](https://rapids.ai) nx-cugraph is a [backend to NetworkX](https://networkx.org/documentation/stable/backends.html)
-to run supported algorithms with GPU acceleration.
-
-## System Requirements
-
-nx-cugraph requires the following:
- * NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+
- * CUDA 11.2, 11.4, 11.5, 11.8, 12.0, 12.2, or 12.5
- * Python version 3.10, 3.11, or 3.12
- * NetworkX >= version 3.0 (version 3.4 or higher recommended)
-
-More details about system requirements can be found in the [RAPIDS System Requirements documentation](https://docs.rapids.ai/install#system-req).
-
-## Installation
-
-nx-cugraph can be installed using either conda or pip.
-
-### conda
-#### latest nightly version
-```
-conda install -c rapidsai-nightly -c conda-forge -c nvidia nx-cugraph
-```
-#### latest stable version
-```
-conda install -c rapidsai -c conda-forge -c nvidia nx-cugraph
-```
-### pip
-#### latest nightly version
-```
-python -m pip install nx-cugraph-cu11 --extra-index-url https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-```
-#### latest stable version
-```
-python -m pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com
-```
-Notes:
- * The pip example above installs for CUDA 11. To install for CUDA 12, replace `-cu11` with `-cu12`
- * Additional information relevant to installing any RAPIDS package can be found [here](https://rapids.ai/#quick-start).
-
-## Enabling nx-cugraph
-
-NetworkX will use nx-cugraph as the graph analytics backend if any of the
-following are used:
-
-### `NX_CUGRAPH_AUTOCONFIG` environment variable.
-By setting `NX_CUGRAPH_AUTOCONFIG=True`, NetworkX will automatically dispatch algorithm calls to nx-cugraph (if the backend is supported). This allows users to GPU accelerate their code with zero code change.
-
-Read more on [Networkx Backends and How They Work](https://networkx.org/documentation/stable/reference/backends.html).
-
-Example:
-```
-bash> NX_CUGRAPH_AUTOCONFIG=True python my_networkx_script.py
-```
-
-### `backend=` keyword argument
-To explicitly specify a particular backend for an API, use the `backend=`
-keyword argument. This argument takes precedence over the
-`NX_CUGRAPH_AUTOCONFIG` environment variable. This requires anyone
-running code that uses the `backend=` keyword argument to have the specified
-backend installed.
-
-Example:
-```
-nx.betweenness_centrality(cit_patents_graph, k=k, backend="cugraph")
-```
-
-### Type-based dispatching
-
-NetworkX also supports automatically dispatching to backends associated with
-specific graph types. Like the `backend=` keyword argument example above, this
-requires the user to write code for a specific backend, and therefore requires
-the backend to be installed, but has the advantage of ensuring a particular
-behavior without the potential for runtime conversions.
-
-To use type-based dispatching with nx-cugraph, the user must import the backend
-directly in their code to access the utilities provided to create a Graph
-instance specifically for the nx-cugraph backend.
-
-Example:
-```
-import networkx as nx
-import nx_cugraph as nxcg
-
-G = nx.Graph()
-...
-nxcg_G = nxcg.from_networkx(G)             # conversion happens once here
-nx.betweenness_centrality(nxcg_G, k=1000)  # nxcg Graph type causes cugraph backend
-                                           # to be used, no conversion necessary
-```
-
-## Supported Algorithms
-
-The nx-cugraph backend to NetworkX connects
-[pylibcugraph](../../readme_pages/pylibcugraph.md) (cuGraph's low-level python
-interface to its CUDA-based graph analytics library) and
-[CuPy](https://cupy.dev/) (a GPU-accelerated array library) to NetworkX's
-familiar and easy-to-use API.
-
-Below is the list of algorithms that are currently supported in nx-cugraph.
-
-### [Algorithms](https://networkx.org/documentation/latest/reference/algorithms/index.html)
-
-<pre>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/bipartite.html#module-networkx.algorithms.bipartite">bipartite</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/bipartite.html#module-networkx.algorithms.bipartite.generators">generators</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.bipartite.generators.complete_bipartite_graph.html#networkx.algorithms.bipartite.generators.complete_bipartite_graph">complete_bipartite_graph</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/centrality.html#module-networkx.algorithms.centrality">centrality</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/centrality.html#networkx-algorithms-centrality-betweenness">betweenness</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.betweenness_centrality.html#networkx.algorithms.centrality.betweenness_centrality">betweenness_centrality</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.edge_betweenness_centrality.html#networkx.algorithms.centrality.edge_betweenness_centrality">edge_betweenness_centrality</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/centrality.html#networkx-algorithms-centrality-degree-alg">degree_alg</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.degree_centrality.html#networkx.algorithms.centrality.degree_centrality">degree_centrality</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.in_degree_centrality.html#networkx.algorithms.centrality.in_degree_centrality">in_degree_centrality</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.out_degree_centrality.html#networkx.algorithms.centrality.out_degree_centrality">out_degree_centrality</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/centrality.html#networkx-algorithms-centrality-eigenvector">eigenvector</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.eigenvector_centrality.html#networkx.algorithms.centrality.eigenvector_centrality">eigenvector_centrality</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/centrality.html#networkx-algorithms-centrality-katz">katz</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.katz_centrality.html#networkx.algorithms.centrality.katz_centrality">katz_centrality</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/clustering.html#module-networkx.algorithms.cluster">cluster</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html#networkx.algorithms.cluster.average_clustering">average_clustering</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.clustering.html#networkx.algorithms.cluster.clustering">clustering</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.transitivity.html#networkx.algorithms.cluster.transitivity">transitivity</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.triangles.html#networkx.algorithms.cluster.triangles">triangles</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/community.html#module-networkx.algorithms.community">community</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/community.html#module-networkx.algorithms.community.louvain">louvain</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.louvain.louvain_communities.html#networkx.algorithms.community.louvain.louvain_communities">louvain_communities</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/component.html#module-networkx.algorithms.components">components</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/component.html#networkx-algorithms-components-connected">connected</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.connected_components.html#networkx.algorithms.components.connected_components">connected_components</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.is_connected.html#networkx.algorithms.components.is_connected">is_connected</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.node_connected_component.html#networkx.algorithms.components.node_connected_component">node_connected_component</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.number_connected_components.html#networkx.algorithms.components.number_connected_components">number_connected_components</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/component.html#networkx-algorithms-components-weakly-connected">weakly_connected</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.is_weakly_connected.html#networkx.algorithms.components.is_weakly_connected">is_weakly_connected</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.number_weakly_connected_components.html#networkx.algorithms.components.number_weakly_connected_components">number_weakly_connected_components</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.components.weakly_connected_components.html#networkx.algorithms.components.weakly_connected_components">weakly_connected_components</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/core.html#module-networkx.algorithms.core">core</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.core.core_number.html#networkx.algorithms.core.core_number">core_number</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.core.k_truss.html#networkx.algorithms.core.k_truss">k_truss</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/dag.html#module-networkx.algorithms.dag">dag</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.dag.ancestors.html#networkx.algorithms.dag.ancestors">ancestors</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.dag.descendants.html#networkx.algorithms.dag.descendants">descendants</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/isolates.html#module-networkx.algorithms.isolate">isolate</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.isolate.is_isolate.html#networkx.algorithms.isolate.is_isolate">is_isolate</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.isolate.isolates.html#networkx.algorithms.isolate.isolates">isolates</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.isolate.number_of_isolates.html#networkx.algorithms.isolate.number_of_isolates">number_of_isolates</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/link_analysis.html">link_analysis</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/link_analysis.html#module-networkx.algorithms.link_analysis.hits_alg">hits_alg</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.hits_alg.hits.html#networkx.algorithms.link_analysis.hits_alg.hits">hits</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/link_analysis.html#module-networkx.algorithms.link_analysis.pagerank_alg">pagerank_alg</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html#networkx.algorithms.link_analysis.pagerank_alg.pagerank">pagerank</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html">operators</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/operators.html#module-networkx.algorithms.operators.unary">unary</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.operators.unary.complement.html#networkx.algorithms.operators.unary.complement">complement</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.operators.unary.reverse.html#networkx.algorithms.operators.unary.reverse">reverse</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/reciprocity.html#module-networkx.algorithms.reciprocity">reciprocity</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.reciprocity.overall_reciprocity.html#networkx.algorithms.reciprocity.overall_reciprocity">overall_reciprocity</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.reciprocity.reciprocity.html#networkx.algorithms.reciprocity.reciprocity">reciprocity</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/shortest_paths.html">shortest_paths</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/shortest_paths.html#module-networkx.algorithms.shortest_paths.generic">generic</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.has_path.html#networkx.algorithms.shortest_paths.generic.has_path">has_path</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path.html#networkx.algorithms.shortest_paths.generic.shortest_path">shortest_path</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.generic.shortest_path_length.html#networkx.algorithms.shortest_paths.generic.shortest_path_length">shortest_path_length</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/shortest_paths.html#module-networkx.algorithms.shortest_paths.unweighted">unweighted</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path.html#networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path">all_pairs_shortest_path</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path_length.html#networkx.algorithms.shortest_paths.unweighted.all_pairs_shortest_path_length">all_pairs_shortest_path_length</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.bidirectional_shortest_path.html#networkx.algorithms.shortest_paths.unweighted.bidirectional_shortest_path">bidirectional_shortest_path</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.single_source_shortest_path.html#networkx.algorithms.shortest_paths.unweighted.single_source_shortest_path">single_source_shortest_path</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.single_source_shortest_path_length.html#networkx.algorithms.shortest_paths.unweighted.single_source_shortest_path_length">single_source_shortest_path_length</a>
- │   ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.single_target_shortest_path.html#networkx.algorithms.shortest_paths.unweighted.single_target_shortest_path">single_target_shortest_path</a>
- │   └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.unweighted.single_target_shortest_path_length.html#networkx.algorithms.shortest_paths.unweighted.single_target_shortest_path_length">single_target_shortest_path_length</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/shortest_paths.html#module-networkx.algorithms.shortest_paths.weighted">weighted</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path.html#networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path">all_pairs_bellman_ford_path</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path_length.html#networkx.algorithms.shortest_paths.weighted.all_pairs_bellman_ford_path_length">all_pairs_bellman_ford_path_length</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra.html#networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra">all_pairs_dijkstra</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path.html#networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path">all_pairs_dijkstra_path</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path_length.html#networkx.algorithms.shortest_paths.weighted.all_pairs_dijkstra_path_length">all_pairs_dijkstra_path_length</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.bellman_ford_path.html#networkx.algorithms.shortest_paths.weighted.bellman_ford_path">bellman_ford_path</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.bellman_ford_path_length.html#networkx.algorithms.shortest_paths.weighted.bellman_ford_path_length">bellman_ford_path_length</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.dijkstra_path.html#networkx.algorithms.shortest_paths.weighted.dijkstra_path">dijkstra_path</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.dijkstra_path_length.html#networkx.algorithms.shortest_paths.weighted.dijkstra_path_length">dijkstra_path_length</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford.html#networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford">single_source_bellman_ford</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford_path.html#networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford_path">single_source_bellman_ford_path</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford_path_length.html#networkx.algorithms.shortest_paths.weighted.single_source_bellman_ford_path_length">single_source_bellman_ford_path_length</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_dijkstra.html#networkx.algorithms.shortest_paths.weighted.single_source_dijkstra">single_source_dijkstra</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_dijkstra_path.html#networkx.algorithms.shortest_paths.weighted.single_source_dijkstra_path">single_source_dijkstra_path</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.shortest_paths.weighted.single_source_dijkstra_path_length.html#networkx.algorithms.shortest_paths.weighted.single_source_dijkstra_path_length">single_source_dijkstra_path_length</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/traversal.html">traversal</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/traversal.html#module-networkx.algorithms.traversal.breadth_first_search">breadth_first_search</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_edges.html#networkx.algorithms.traversal.breadth_first_search.bfs_edges">bfs_edges</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_layers.html#networkx.algorithms.traversal.breadth_first_search.bfs_layers">bfs_layers</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_predecessors.html#networkx.algorithms.traversal.breadth_first_search.bfs_predecessors">bfs_predecessors</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_successors.html#networkx.algorithms.traversal.breadth_first_search.bfs_successors">bfs_successors</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.bfs_tree.html#networkx.algorithms.traversal.breadth_first_search.bfs_tree">bfs_tree</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.descendants_at_distance.html#networkx.algorithms.traversal.breadth_first_search.descendants_at_distance">descendants_at_distance</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.traversal.breadth_first_search.generic_bfs_edges.html#networkx.algorithms.traversal.breadth_first_search.generic_bfs_edges">generic_bfs_edges</a>
-<a href="https://networkx.org/documentation/stable/reference/algorithms/tree.html">tree</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/tree.html#module-networkx.algorithms.tree.recognition">recognition</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.recognition.is_arborescence.html#networkx.algorithms.tree.recognition.is_arborescence">is_arborescence</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.recognition.is_branching.html#networkx.algorithms.tree.recognition.is_branching">is_branching</a>
-     ├─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.recognition.is_forest.html#networkx.algorithms.tree.recognition.is_forest">is_forest</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.recognition.is_tree.html#networkx.algorithms.tree.recognition.is_tree">is_tree</a>
-</pre>
-
-### [Generators](https://networkx.org/documentation/latest/reference/generators.html)
-
-<pre>
-<a href="https://networkx.org/documentation/stable/reference/generators.html#module-networkx.generators.classic">classic</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.barbell_graph.html#networkx.generators.classic.barbell_graph">barbell_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.circular_ladder_graph.html#networkx.generators.classic.circular_ladder_graph">circular_ladder_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.complete_graph.html#networkx.generators.classic.complete_graph">complete_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.complete_multipartite_graph.html#networkx.generators.classic.complete_multipartite_graph">complete_multipartite_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.cycle_graph.html#networkx.generators.classic.cycle_graph">cycle_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.empty_graph.html#networkx.generators.classic.empty_graph">empty_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.ladder_graph.html#networkx.generators.classic.ladder_graph">ladder_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.lollipop_graph.html#networkx.generators.classic.lollipop_graph">lollipop_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.null_graph.html#networkx.generators.classic.null_graph">null_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.path_graph.html#networkx.generators.classic.path_graph">path_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.star_graph.html#networkx.generators.classic.star_graph">star_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.tadpole_graph.html#networkx.generators.classic.tadpole_graph">tadpole_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.trivial_graph.html#networkx.generators.classic.trivial_graph">trivial_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.turan_graph.html#networkx.generators.classic.turan_graph">turan_graph</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.classic.wheel_graph.html#networkx.generators.classic.wheel_graph">wheel_graph</a>
-<a href="https://networkx.org/documentation/stable/reference/generators.html#module-networkx.generators.community">community</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.community.caveman_graph.html#networkx.generators.community.caveman_graph">caveman_graph</a>
-<a href="https://networkx.org/documentation/stable/reference/generators.html#module-networkx.generators.ego">ego</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.ego.ego_graph.html#networkx.generators.ego.ego_graph">ego_graph</a>
-<a href="https://networkx.org/documentation/stable/reference/generators.html#module-networkx.generators.small">small</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.bull_graph.html#networkx.generators.small.bull_graph">bull_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.chvatal_graph.html#networkx.generators.small.chvatal_graph">chvatal_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.cubical_graph.html#networkx.generators.small.cubical_graph">cubical_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.desargues_graph.html#networkx.generators.small.desargues_graph">desargues_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.diamond_graph.html#networkx.generators.small.diamond_graph">diamond_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.dodecahedral_graph.html#networkx.generators.small.dodecahedral_graph">dodecahedral_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.frucht_graph.html#networkx.generators.small.frucht_graph">frucht_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.heawood_graph.html#networkx.generators.small.heawood_graph">heawood_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.house_graph.html#networkx.generators.small.house_graph">house_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.house_x_graph.html#networkx.generators.small.house_x_graph">house_x_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.icosahedral_graph.html#networkx.generators.small.icosahedral_graph">icosahedral_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.krackhardt_kite_graph.html#networkx.generators.small.krackhardt_kite_graph">krackhardt_kite_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.moebius_kantor_graph.html#networkx.generators.small.moebius_kantor_graph">moebius_kantor_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.octahedral_graph.html#networkx.generators.small.octahedral_graph">octahedral_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.pappus_graph.html#networkx.generators.small.pappus_graph">pappus_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.petersen_graph.html#networkx.generators.small.petersen_graph">petersen_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.sedgewick_maze_graph.html#networkx.generators.small.sedgewick_maze_graph">sedgewick_maze_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.tetrahedral_graph.html#networkx.generators.small.tetrahedral_graph">tetrahedral_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.truncated_cube_graph.html#networkx.generators.small.truncated_cube_graph">truncated_cube_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.truncated_tetrahedron_graph.html#networkx.generators.small.truncated_tetrahedron_graph">truncated_tetrahedron_graph</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.small.tutte_graph.html#networkx.generators.small.tutte_graph">tutte_graph</a>
-<a href="https://networkx.org/documentation/stable/reference/generators.html#module-networkx.generators.social">social</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.social.davis_southern_women_graph.html#networkx.generators.social.davis_southern_women_graph">davis_southern_women_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.social.florentine_families_graph.html#networkx.generators.social.florentine_families_graph">florentine_families_graph</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.social.karate_club_graph.html#networkx.generators.social.karate_club_graph">karate_club_graph</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.generators.social.les_miserables_graph.html#networkx.generators.social.les_miserables_graph">les_miserables_graph</a>
-</pre>
-
-### Other
-
-<pre>
-<a href="https://networkx.org/documentation/stable/reference/classes/index.html">classes</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/functions.html#module-networkx.classes.function">function</a>
-     └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.classes.function.is_negatively_weighted.html#networkx.classes.function.is_negatively_weighted">is_negatively_weighted</a>
-<a href="https://networkx.org/documentation/stable/reference/convert.html#module-networkx.convert">convert</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert.from_dict_of_lists.html#networkx.convert.from_dict_of_lists">from_dict_of_lists</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert.to_dict_of_lists.html#networkx.convert.to_dict_of_lists">to_dict_of_lists</a>
-<a href="https://networkx.org/documentation/stable/reference/convert.html#module-networkx.convert_matrix">convert_matrix</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_pandas_edgelist.html#networkx.convert_matrix.from_pandas_edgelist">from_pandas_edgelist</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.convert_matrix.from_scipy_sparse_array.html#networkx.convert_matrix.from_scipy_sparse_array">from_scipy_sparse_array</a>
-<a href="https://networkx.org/documentation/stable/reference/relabel.html#module-networkx.relabel">relabel</a>
- ├─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.relabel.convert_node_labels_to_integers.html#networkx.relabel.convert_node_labels_to_integers">convert_node_labels_to_integers</a>
- └─ <a href="https://networkx.org/documentation/stable/reference/generated/networkx.relabel.relabel_nodes.html#networkx.relabel.relabel_nodes">relabel_nodes</a>
-</pre>
-
-To request nx-cugraph backend support for a NetworkX API that is not listed
-above, visit the [cuGraph GitHub repo](https://github.com/rapidsai/cugraph).
diff --git a/python/nx-cugraph/_nx_cugraph/VERSION b/python/nx-cugraph/_nx_cugraph/VERSION
deleted file mode 120000
index d62dc733efd..00000000000
--- a/python/nx-cugraph/_nx_cugraph/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-../../../VERSION
\ No newline at end of file
diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py
deleted file mode 100644
index 9feeda568a6..00000000000
--- a/python/nx-cugraph/_nx_cugraph/__init__.py
+++ /dev/null
@@ -1,401 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tell NetworkX about the cugraph backend. This file can update itself:
-
-$ make plugin-info
-
-or
-
-$ make all  # Recommended - runs 'plugin-info' followed by 'lint'
-
-or
-
-$ python _nx_cugraph/__init__.py
-"""
-import os
-
-from _nx_cugraph._version import __version__
-
-# This is normally handled by packaging.version.Version, but instead of adding
-# an additional runtime dependency on "packaging", assume __version__ will
-# always be in <major>.<minor>.<build> format.
-(_version_major, _version_minor) = __version__.split(".")[:2]
-
-# Entries between BEGIN and END are automatically generated
-_info = {
-    "backend_name": "cugraph",
-    "project": "nx-cugraph",
-    "package": "nx_cugraph",
-    "url": "https://rapids.ai/nx-cugraph",
-    "short_summary": "GPU-accelerated backend.",
-    # "description": "TODO",
-    "functions": {
-        # BEGIN: functions
-        "all_pairs_bellman_ford_path",
-        "all_pairs_bellman_ford_path_length",
-        "all_pairs_dijkstra",
-        "all_pairs_dijkstra_path",
-        "all_pairs_dijkstra_path_length",
-        "all_pairs_shortest_path",
-        "all_pairs_shortest_path_length",
-        "ancestors",
-        "average_clustering",
-        "barbell_graph",
-        "bellman_ford_path",
-        "bellman_ford_path_length",
-        "betweenness_centrality",
-        "bfs_edges",
-        "bfs_layers",
-        "bfs_predecessors",
-        "bfs_successors",
-        "bfs_tree",
-        "bidirectional_shortest_path",
-        "bull_graph",
-        "caveman_graph",
-        "chvatal_graph",
-        "circular_ladder_graph",
-        "clustering",
-        "complement",
-        "complete_bipartite_graph",
-        "complete_graph",
-        "complete_multipartite_graph",
-        "connected_components",
-        "convert_node_labels_to_integers",
-        "core_number",
-        "cubical_graph",
-        "cycle_graph",
-        "davis_southern_women_graph",
-        "degree_centrality",
-        "desargues_graph",
-        "descendants",
-        "descendants_at_distance",
-        "diamond_graph",
-        "dijkstra_path",
-        "dijkstra_path_length",
-        "dodecahedral_graph",
-        "edge_betweenness_centrality",
-        "ego_graph",
-        "eigenvector_centrality",
-        "empty_graph",
-        "florentine_families_graph",
-        "from_dict_of_lists",
-        "from_pandas_edgelist",
-        "from_scipy_sparse_array",
-        "frucht_graph",
-        "generic_bfs_edges",
-        "has_path",
-        "heawood_graph",
-        "hits",
-        "house_graph",
-        "house_x_graph",
-        "icosahedral_graph",
-        "in_degree_centrality",
-        "is_arborescence",
-        "is_branching",
-        "is_connected",
-        "is_forest",
-        "is_isolate",
-        "is_negatively_weighted",
-        "is_tree",
-        "is_weakly_connected",
-        "isolates",
-        "k_truss",
-        "karate_club_graph",
-        "katz_centrality",
-        "krackhardt_kite_graph",
-        "ladder_graph",
-        "les_miserables_graph",
-        "lollipop_graph",
-        "louvain_communities",
-        "moebius_kantor_graph",
-        "node_connected_component",
-        "null_graph",
-        "number_connected_components",
-        "number_of_isolates",
-        "number_of_selfloops",
-        "number_weakly_connected_components",
-        "octahedral_graph",
-        "out_degree_centrality",
-        "overall_reciprocity",
-        "pagerank",
-        "pappus_graph",
-        "path_graph",
-        "petersen_graph",
-        "reciprocity",
-        "relabel_nodes",
-        "reverse",
-        "sedgewick_maze_graph",
-        "shortest_path",
-        "shortest_path_length",
-        "single_source_bellman_ford",
-        "single_source_bellman_ford_path",
-        "single_source_bellman_ford_path_length",
-        "single_source_dijkstra",
-        "single_source_dijkstra_path",
-        "single_source_dijkstra_path_length",
-        "single_source_shortest_path",
-        "single_source_shortest_path_length",
-        "single_target_shortest_path",
-        "single_target_shortest_path_length",
-        "star_graph",
-        "tadpole_graph",
-        "tetrahedral_graph",
-        "to_dict_of_lists",
-        "transitivity",
-        "triangles",
-        "trivial_graph",
-        "truncated_cube_graph",
-        "truncated_tetrahedron_graph",
-        "turan_graph",
-        "tutte_graph",
-        "weakly_connected_components",
-        "wheel_graph",
-        # END: functions
-    },
-    "additional_docs": {
-        # BEGIN: additional_docs
-        "all_pairs_bellman_ford_path": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "all_pairs_bellman_ford_path_length": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "average_clustering": "Directed graphs and `weight` parameter are not yet supported.",
-        "bellman_ford_path": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "bellman_ford_path_length": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
-        "bfs_edges": "`sort_neighbors` parameter is not yet supported.",
-        "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
-        "bfs_successors": "`sort_neighbors` parameter is not yet supported.",
-        "bfs_tree": "`sort_neighbors` parameter is not yet supported.",
-        "clustering": "Directed graphs and `weight` parameter are not yet supported.",
-        "core_number": "Directed graphs are not yet supported.",
-        "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
-        "ego_graph": "Weighted ego_graph with negative cycles is not yet supported. `NotImplementedError` will be raised if there are negative `distance` edge weights.",
-        "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
-        "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.",
-        "generic_bfs_edges": "`neighbors` parameter is not yet supported.",
-        "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
-        "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
-        "pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
-        "shortest_path": "Negative weights are not yet supported.",
-        "shortest_path_length": "Negative weights are not yet supported.",
-        "single_source_bellman_ford": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "single_source_bellman_ford_path": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "single_source_bellman_ford_path_length": "Negative cycles are not yet supported. ``NotImplementedError`` will be raised if there are negative edge weights. We plan to support negative edge weights soon. Also, callable ``weight`` argument is not supported.",
-        "transitivity": "Directed graphs are not yet supported.",
-        # END: additional_docs
-    },
-    "additional_parameters": {
-        # BEGIN: additional_parameters
-        "all_pairs_bellman_ford_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "all_pairs_bellman_ford_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "all_pairs_dijkstra": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "all_pairs_dijkstra_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "all_pairs_dijkstra_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "bellman_ford_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "bellman_ford_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "dijkstra_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "dijkstra_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "ego_graph": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "eigenvector_centrality": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "hits": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-            'weight : string or None, optional (default="weight")': "The edge attribute to use as the edge weight.",
-        },
-        "katz_centrality": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "louvain_communities": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "pagerank": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "shortest_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "shortest_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_bellman_ford": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_bellman_ford_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_bellman_ford_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_dijkstra": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_dijkstra_path": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        "single_source_dijkstra_path_length": {
-            "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.",
-        },
-        # END: additional_parameters
-    },
-}
-
-
-def get_info():
-    """Target of ``networkx.plugin_info`` entry point.
-
-    This tells NetworkX about the cugraph backend without importing nx_cugraph.
-    """
-    # Convert to e.g. `{"functions": {"myfunc": {"additional_docs": ...}}}`
-    d = _info.copy()
-    info_keys = {"additional_docs", "additional_parameters"}
-    d["functions"] = {
-        func: {
-            info_key: vals[func]
-            for info_key in info_keys
-            if func in (vals := d[info_key])
-        }
-        for func in d["functions"]
-    }
-    # Add keys for Networkx <3.3
-    for func_info in d["functions"].values():
-        if "additional_docs" in func_info:
-            func_info["extra_docstring"] = func_info["additional_docs"]
-        if "additional_parameters" in func_info:
-            func_info["extra_parameters"] = func_info["additional_parameters"]
-
-    for key in info_keys:
-        del d[key]
-
-    d["default_config"] = {
-        "use_compat_graphs": os.environ.get("NX_CUGRAPH_USE_COMPAT_GRAPHS", "true")
-        .strip()
-        .lower()
-        == "true",
-    }
-
-    # Enable zero-code change usage with a simple environment variable
-    # by setting or updating other NETWORKX environment variables.
-    if os.environ.get("NX_CUGRAPH_AUTOCONFIG", "").strip().lower() == "true":
-        from itertools import chain
-
-        def update_env_var(varname):
-            """Add "cugraph" to a list of backend names environment variable."""
-            if varname not in os.environ:
-                os.environ[varname] = "cugraph"
-                return
-            string = os.environ[varname]
-            vals = [
-                stripped for x in string.strip().split(",") if (stripped := x.strip())
-            ]
-            if "cugraph" not in vals:
-                # Should we append or prepend? Let's be first!
-                os.environ[varname] = ",".join(chain(["cugraph"], vals))
-
-        # Automatically convert NetworkX Graphs to nx-cugraph for algorithms
-        if (varname := "NETWORKX_BACKEND_PRIORITY_ALGOS") in os.environ:
-            # "*_ALGOS" is given priority in NetworkX >=3.4
-            update_env_var(varname)
-            # But update this too to "just work" if users mix env vars and nx versions
-            os.environ["NETWORKX_BACKEND_PRIORITY"] = os.environ[varname]
-        else:
-            update_env_var("NETWORKX_BACKEND_PRIORITY")
-        # And for older NetworkX versions
-        update_env_var("NETWORKX_AUTOMATIC_BACKENDS")  # For NetworkX 3.2
-        update_env_var("NETWORKX_GRAPH_CONVERT")  # For NetworkX 3.0 and 3.1
-        # Automatically create nx-cugraph Graph from graph generators
-        update_env_var("NETWORKX_BACKEND_PRIORITY_GENERATORS")
-        # Run default NetworkX implementation (in >=3.4) if not implemented by nx-cugraph
-        if (varname := "NETWORKX_FALLBACK_TO_NX") not in os.environ:
-            os.environ[varname] = "true"
-        # Cache graph conversions (default is False in NetworkX 3.2
-        if (varname := "NETWORKX_CACHE_CONVERTED_GRAPHS") not in os.environ:
-            os.environ[varname] = "true"
-
-    return d
-
-
-def _check_networkx_version() -> tuple[int, int]:
-    """Check the version of networkx and return ``(major, minor)`` version tuple."""
-    import re
-    import warnings
-
-    import networkx as nx
-
-    version_major, version_minor = nx.__version__.split(".")[:2]
-    if version_major != "3":
-        warnings.warn(
-            f"nx-cugraph version {__version__} is only known to work with networkx "
-            f"versions 3.x, but networkx {nx.__version__} is installed. "
-            "Perhaps try upgrading your Python environment.",
-            UserWarning,
-            stacklevel=2,
-        )
-
-    # Allow single-digit minor versions, e.g. 3.4 and release candidates, e.g. 3.4rc0
-    pattern = r"^\d(rc\d+)?$"
-
-    if not re.match(pattern, version_minor):
-        raise RuntimeWarning(
-            f"nx-cugraph version {__version__} does not work with networkx version "
-            f"{nx.__version__}. Please upgrade (or fix) your Python environment."
-        )
-
-    nxver_major = int(version_major)
-    nxver_minor = int(re.match(r"^\d+", version_minor).group())
-    return (nxver_major, nxver_minor)
-
-
-if __name__ == "__main__":
-    from pathlib import Path
-
-    # This script imports nx_cugraph modules, which imports nx_cugraph runtime
-    # dependencies. The modules do not need the runtime deps, so stub them out
-    # to avoid installing them.
-    class Stub:
-        def __getattr__(self, *args, **kwargs):
-            return Stub()
-
-        def __call__(self, *args, **kwargs):
-            return Stub()
-
-    import sys
-
-    sys.modules["cupy"] = Stub()
-    sys.modules["numpy"] = Stub()
-    sys.modules["pylibcugraph"] = Stub()
-
-    from _nx_cugraph.core import main
-
-    filepath = Path(__file__)
-    text = main(filepath)
-    with filepath.open("w") as f:
-        f.write(text)
diff --git a/python/nx-cugraph/_nx_cugraph/_version.py b/python/nx-cugraph/_nx_cugraph/_version.py
deleted file mode 100644
index 3cf8b23da18..00000000000
--- a/python/nx-cugraph/_nx_cugraph/_version.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import importlib.resources
-
-# Read VERSION file from the module that is symlinked to VERSION file
-# in the root of the repo at build time or copied to the module at
-# installation. VERSION is a separate file that allows CI build-time scripts
-# to update version info (including commit hashes) without modifying
-# source files.
-__version__ = (
-    importlib.resources.files(__package__).joinpath("VERSION").read_text().strip()
-)
-try:
-    __git_commit__ = (
-        importlib.resources.files(__package__)
-        .joinpath("GIT_COMMIT")
-        .read_text()
-        .strip()
-    )
-except FileNotFoundError:
-    __git_commit__ = ""
-
-__all__ = ["__git_commit__", "__version__"]
diff --git a/python/nx-cugraph/_nx_cugraph/core.py b/python/nx-cugraph/_nx_cugraph/core.py
deleted file mode 100644
index 82ce7bc438a..00000000000
--- a/python/nx-cugraph/_nx_cugraph/core.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Utilities to help keep _nx_cugraph up to date."""
-
-
-def get_functions():
-    from nx_cugraph.interface import BackendInterface
-    from nx_cugraph.utils import networkx_algorithm
-
-    return {
-        key: val
-        for key, val in vars(BackendInterface).items()
-        if isinstance(val, networkx_algorithm)
-    }
-
-
-def get_additional_docs(functions=None):
-    if functions is None:
-        functions = get_functions()
-    return {key: val.extra_doc for key, val in functions.items() if val.extra_doc}
-
-
-def get_additional_parameters(functions=None):
-    if functions is None:
-        functions = get_functions()
-    return {key: val.extra_params for key, val in functions.items() if val.extra_params}
-
-
-def update_text(text, lines_to_add, target, indent=" " * 8):
-    begin = f"# BEGIN: {target}\n"
-    end = f"# END: {target}\n"
-    start = text.index(begin)
-    stop = text.index(end)
-    to_add = "\n".join([f"{indent}{line}" for line in lines_to_add])
-    return f"{text[:start]}{begin}{to_add}\n{indent}{text[stop:]}"
-
-
-def dq_repr(s):
-    """Return repr(s) quoted with the double quote preference used by black."""
-    rs = repr(s)
-    if rs.startswith("'") and '"' not in rs:
-        rs = rs.strip("'")
-        return f'"{rs}"'
-    return rs
-
-
-def dict_to_lines(d, *, indent=""):
-    for key in sorted(d):
-        val = d[key]
-        if "\n" not in val:
-            yield f"{indent}{dq_repr(key)}: {dq_repr(val)},"
-        else:
-            yield f"{indent}{dq_repr(key)}: ("
-            *lines, last_line = val.split("\n")
-            for line in lines:
-                line += "\n"
-                yield f"    {indent}{dq_repr(line)}"
-            yield f"    {indent}{dq_repr(last_line)}"
-            yield f"{indent}),"
-
-
-def main(filepath):
-    from pathlib import Path
-
-    filepath = Path(filepath)
-    with filepath.open() as f:
-        orig_text = f.read()
-    text = orig_text
-
-    # Update functions
-    functions = get_functions()
-    to_add = [f'"{name}",' for name in sorted(functions)]
-    text = update_text(text, to_add, "functions")
-
-    # Update additional_docs
-    additional_docs = get_additional_docs(functions)
-    to_add = list(dict_to_lines(additional_docs))
-    text = update_text(text, to_add, "additional_docs")
-
-    # Update additional_parameters
-    additional_parameters = get_additional_parameters(functions)
-    to_add = []
-    for name in sorted(additional_parameters):
-        params = additional_parameters[name]
-        to_add.append(f"{dq_repr(name)}: {{")
-        to_add.extend(dict_to_lines(params, indent=" " * 4))
-        to_add.append("},")
-    text = update_text(text, to_add, "additional_parameters")
-    return text
diff --git a/python/nx-cugraph/conftest.py b/python/nx-cugraph/conftest.py
deleted file mode 100644
index e329b28d81c..00000000000
--- a/python/nx-cugraph/conftest.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--bench",
-        action="store_true",
-        default=False,
-        help="Run benchmarks (sugar for --benchmark-enable) and skip other tests"
-        " (to run both benchmarks AND tests, use --all)",
-    )
-    parser.addoption(
-        "--all",
-        action="store_true",
-        default=False,
-        help="Run benchmarks AND tests (unlike --bench, which only runs benchmarks)",
-    )
diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml
deleted file mode 100644
index dab2ea70ef1..00000000000
--- a/python/nx-cugraph/lint.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# https://pre-commit.com/
-#
-# Before first use: `pre-commit install`
-# To run: `make lint`
-# To update: `make lint-update`
-#  - &flake8_dependencies below needs updated manually
-fail_fast: false
-default_language_version:
-    python: python3
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
-    hooks:
-      - id: check-added-large-files
-      - id: check-case-conflict
-      - id: check-merge-conflict
-      - id: check-symlinks
-      - id: check-ast
-      - id: check-toml
-      - id: check-yaml
-      - id: debug-statements
-      - id: end-of-file-fixer
-        exclude_types: [svg]
-      - id: mixed-line-ending
-      - id: trailing-whitespace
-  - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.19
-    hooks:
-      - id: validate-pyproject
-        name: Validate pyproject.toml
-  - repo: https://github.com/PyCQA/autoflake
-    rev: v2.3.1
-    hooks:
-      - id: autoflake
-        args: [--in-place]
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.17.0
-    hooks:
-      - id: pyupgrade
-        args: [--py310-plus]
-  - repo: https://github.com/psf/black
-    rev: 24.8.0
-    hooks:
-      - id: black
-      # - id: black-jupyter
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.7
-    hooks:
-      - id: ruff
-        args: [--fix-only, --show-fixes]  # --unsafe-fixes]
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.1.1
-    hooks:
-      - id: flake8
-        args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=B020,SIM105']  # Why is this necessary?
-        additional_dependencies: &flake8_dependencies
-          # These versions need updated manually
-          - flake8==7.1.1
-          - flake8-bugbear==24.8.19
-          - flake8-simplify==0.21.0
-  - repo: https://github.com/asottile/yesqa
-    rev: v1.5.0
-    hooks:
-      - id: yesqa
-        additional_dependencies: *flake8_dependencies
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.3.0
-    hooks:
-      - id: codespell
-        types_or: [python, rst, markdown]
-        additional_dependencies: [tomli]
-        files: ^(nx_cugraph|docs)/
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.7
-    hooks:
-      - id: ruff
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
-    hooks:
-      - id: no-commit-to-branch
-        args: [-p, "^branch-2....$"]
diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py
deleted file mode 100644
index 4404e57f645..00000000000
--- a/python/nx-cugraph/nx_cugraph/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from networkx.exception import *
-
-from _nx_cugraph._version import __git_commit__, __version__
-from _nx_cugraph import _check_networkx_version
-
-_nxver: tuple[int, int] = _check_networkx_version()
-
-from . import utils
-
-from . import classes
-from .classes import *
-
-from . import convert
-from .convert import *
-
-from . import convert_matrix
-from .convert_matrix import *
-
-from . import relabel
-from .relabel import *
-
-from . import generators
-from .generators import *
-
-from . import algorithms
-from .algorithms import *
-
-from .interface import BackendInterface
-
-BackendInterface.Graph = classes.Graph
-BackendInterface.DiGraph = classes.DiGraph
-BackendInterface.MultiGraph = classes.MultiGraph
-BackendInterface.MultiDiGraph = classes.MultiDiGraph
-del BackendInterface
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
deleted file mode 100644
index b4a10bcf0a1..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from . import (
-    bipartite,
-    centrality,
-    cluster,
-    community,
-    components,
-    link_analysis,
-    operators,
-    shortest_paths,
-    traversal,
-    tree,
-)
-from .bipartite import complete_bipartite_graph
-from .centrality import *
-from .cluster import *
-from .components import *
-from .core import *
-from .dag import *
-from .isolate import *
-from .link_analysis import *
-from .operators import *
-from .reciprocity import *
-from .shortest_paths import *
-from .traversal import *
-from .tree.recognition import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py
deleted file mode 100644
index bfc7f1d4d42..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .generators import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py
deleted file mode 100644
index 214970235c6..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from numbers import Integral
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-from nx_cugraph import _nxver
-from nx_cugraph.generators._utils import _create_using_class, _number_and_nodes
-from nx_cugraph.utils import index_dtype, networkx_algorithm
-
-__all__ = [
-    "complete_bipartite_graph",
-]
-
-
-@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12")
-def complete_bipartite_graph(n1, n2, create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    orig_n1, unused_nodes1 = n1
-    orig_n2, unused_nodes2 = n2
-    n1, nodes1 = _number_and_nodes(n1)
-    n2, nodes2 = _number_and_nodes(n2)
-    all_indices = cp.indices((n1, n2), dtype=index_dtype)
-    indices0 = all_indices[0].ravel()
-    indices1 = all_indices[1].ravel() + n1
-    del all_indices
-    src_indices = cp.hstack((indices0, indices1))
-    dst_indices = cp.hstack((indices1, indices0))
-    bipartite = cp.zeros(n1 + n2, np.int8)
-    bipartite[n1:] = 1
-    if isinstance(orig_n1, Integral) and isinstance(orig_n2, Integral):
-        nodes = None
-    else:
-        nodes = list(range(n1)) if nodes1 is None else nodes1
-        nodes.extend(range(n2) if nodes2 is None else nodes2)
-        if len(set(nodes)) != len(nodes):
-            raise nx.NetworkXError("Inputs n1 and n2 must contain distinct nodes")
-    if _nxver <= (3, 3):
-        name = f"complete_bipartite_graph({orig_n1}, {orig_n2})"
-    else:
-        name = f"complete_bipartite_graph({n1}, {n2})"
-    G = graph_class.from_coo(
-        n1 + n2,
-        src_indices,
-        dst_indices,
-        node_values={"bipartite": bipartite},
-        id_to_key=nodes,
-        name=name,
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py
deleted file mode 100644
index 496dc6aff81..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .betweenness import *
-from .degree_alg import *
-from .eigenvector import *
-from .katz import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py
deleted file mode 100644
index f6bb142cded..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _seed_to_int, networkx_algorithm
-
-__all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
-
-
-@networkx_algorithm(
-    is_incomplete=True,  # weight not supported
-    is_different=True,  # RNG with seed is different
-    version_added="23.10",
-    _plc="betweenness_centrality",
-)
-def betweenness_centrality(
-    G, k=None, normalized=True, weight=None, endpoints=False, seed=None
-):
-    """`weight` parameter is not yet supported, and RNG with seed may be different."""
-    if weight is not None:
-        raise NotImplementedError(
-            "Weighted implementation of betweenness centrality not currently supported"
-        )
-    seed = _seed_to_int(seed)
-    G = _to_graph(G, weight)
-    node_ids, values = plc.betweenness_centrality(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        k=k,
-        random_state=seed,
-        normalized=normalized,
-        include_endpoints=endpoints,
-        do_expensive_check=False,
-    )
-    return G._nodearrays_to_dict(node_ids, values)
-
-
-@betweenness_centrality._can_run
-def _(G, k=None, normalized=True, weight=None, endpoints=False, seed=None):
-    return weight is None
-
-
-@networkx_algorithm(
-    is_incomplete=True,  # weight not supported
-    is_different=True,  # RNG with seed is different
-    version_added="23.10",
-    _plc="edge_betweenness_centrality",
-)
-def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
-    """`weight` parameter is not yet supported, and RNG with seed may be different."""
-    if weight is not None:
-        raise NotImplementedError(
-            "Weighted implementation of betweenness centrality not currently supported"
-        )
-    seed = _seed_to_int(seed)
-    G = _to_graph(G, weight)
-    src_ids, dst_ids, values, _edge_ids = plc.edge_betweenness_centrality(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        k=k,
-        random_state=seed,
-        normalized=normalized,
-        do_expensive_check=False,
-    )
-    if not G.is_directed():
-        mask = src_ids <= dst_ids
-        src_ids = src_ids[mask]
-        dst_ids = dst_ids[mask]
-        values = 2 * values[mask]
-    return G._edgearrays_to_dict(src_ids, dst_ids, values)
-
-
-@edge_betweenness_centrality._can_run
-def _(G, k=None, normalized=True, weight=None, seed=None):
-    return weight is None
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py
deleted file mode 100644
index 1cc051c698f..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from nx_cugraph.convert import _to_directed_graph, _to_graph
-from nx_cugraph.utils import networkx_algorithm, not_implemented_for
-
-__all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"]
-
-
-@networkx_algorithm(version_added="23.12")
-def degree_centrality(G):
-    G = _to_graph(G)
-    if len(G) <= 1:
-        return dict.fromkeys(G, 1)
-    deg = G._degrees_array()
-    centrality = deg * (1 / (len(G) - 1))
-    return G._nodearray_to_dict(centrality)
-
-
-@degree_centrality._should_run
-def _(G):
-    return "Fast algorithm; not worth converting."
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="23.12")
-def in_degree_centrality(G):
-    G = _to_directed_graph(G)
-    if len(G) <= 1:
-        return dict.fromkeys(G, 1)
-    deg = G._in_degrees_array()
-    centrality = deg * (1 / (len(G) - 1))
-    return G._nodearray_to_dict(centrality)
-
-
-@in_degree_centrality._should_run
-def _(G):
-    return "Fast algorithm; not worth converting."
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="23.12")
-def out_degree_centrality(G):
-    G = _to_directed_graph(G)
-    if len(G) <= 1:
-        return dict.fromkeys(G, 1)
-    deg = G._out_degrees_array()
-    centrality = deg * (1 / (len(G) - 1))
-    return G._nodearray_to_dict(centrality)
-
-
-@out_degree_centrality._should_run
-def _(G):
-    return "Fast algorithm; not worth converting."
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py
deleted file mode 100644
index c32b6fbb708..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    networkx_algorithm,
-    not_implemented_for,
-)
-
-__all__ = ["eigenvector_centrality"]
-
-
-@not_implemented_for("multigraph")
-@networkx_algorithm(
-    extra_params=_dtype_param,
-    is_incomplete=True,  # nstart not supported
-    version_added="23.12",
-    _plc="eigenvector_centrality",
-)
-def eigenvector_centrality(
-    G, max_iter=100, tol=1.0e-6, nstart=None, weight=None, *, dtype=None
-):
-    """`nstart` parameter is not used, but it is checked for validity."""
-    G = _to_graph(G, weight, 1, np.float32)
-    if len(G) == 0:
-        raise nx.NetworkXPointlessConcept(
-            "cannot compute centrality for the null graph"
-        )
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    if nstart is not None:
-        # Check if given nstart is valid even though we don't use it
-        nstart = G._dict_to_nodearray(nstart, dtype=dtype)
-        if (nstart == 0).all():
-            raise nx.NetworkXError("initial vector cannot have all zero values")
-        if nstart.sum() == 0:
-            raise ZeroDivisionError
-        # nstart /= total  # Uncomment (and assign total) when nstart is used below
-    try:
-        node_ids, values = plc.eigenvector_centrality(
-            resource_handle=plc.ResourceHandle(),
-            graph=G._get_plc_graph(weight, 1, dtype, store_transposed=True),
-            epsilon=tol,
-            max_iterations=max_iter,
-            do_expensive_check=False,
-        )
-    except RuntimeError as exc:
-        # Errors from PLC are sometimes a little scary and not very helpful
-        raise nx.PowerIterationFailedConvergence(max_iter) from exc
-    return G._nodearrays_to_dict(node_ids, values)
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py
deleted file mode 100644
index 1c6ed61703d..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    networkx_algorithm,
-    not_implemented_for,
-)
-
-__all__ = ["katz_centrality"]
-
-
-@not_implemented_for("multigraph")
-@networkx_algorithm(
-    extra_params=_dtype_param,
-    is_incomplete=True,  # nstart and normalized=False not supported
-    version_added="23.12",
-    _plc="katz_centrality",
-)
-def katz_centrality(
-    G,
-    alpha=0.1,
-    beta=1.0,
-    max_iter=1000,
-    tol=1.0e-6,
-    nstart=None,
-    normalized=True,
-    weight=None,
-    *,
-    dtype=None,
-):
-    """`nstart` isn't used (but is checked), and `normalized=False` is not supported."""
-    if not normalized:
-        # Redundant with the `_can_run` check below when being dispatched by NetworkX,
-        # but we raise here in case this funcion is called directly.
-        raise NotImplementedError("normalized=False is not supported.")
-    G = _to_graph(G, weight, 1, np.float32)
-    if (N := len(G)) == 0:
-        return {}
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    if nstart is not None:
-        # Check if given nstart is valid even though we don't use it
-        nstart = G._dict_to_nodearray(nstart, 0, dtype)
-    b = bs = None
-    try:
-        b = float(beta)
-    except (TypeError, ValueError) as exc:
-        try:
-            bs = G._dict_to_nodearray(beta, dtype=dtype)
-            b = 1.0  # float value must be given to PLC (and will be ignored)
-        except (KeyError, ValueError):
-            raise nx.NetworkXError(
-                "beta dictionary must have a value for every node"
-            ) from exc
-    try:
-        node_ids, values = plc.katz_centrality(
-            resource_handle=plc.ResourceHandle(),
-            graph=G._get_plc_graph(weight, 1, dtype, store_transposed=True),
-            betas=bs,
-            alpha=alpha,
-            beta=b,
-            epsilon=N * tol,
-            max_iterations=max_iter,
-            do_expensive_check=False,
-        )
-    except RuntimeError as exc:
-        # Errors from PLC are sometimes a little scary and not very helpful
-        raise nx.PowerIterationFailedConvergence(max_iter) from exc
-    return G._nodearrays_to_dict(node_ids, values)
-
-
-@katz_centrality._can_run
-def _(
-    G,
-    alpha=0.1,
-    beta=1.0,
-    max_iter=1000,
-    tol=1.0e-6,
-    nstart=None,
-    normalized=True,
-    weight=None,
-    *,
-    dtype=None,
-):
-    return normalized
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py
deleted file mode 100644
index c355a1bb7c9..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_undirected_graph
-from nx_cugraph.utils import networkx_algorithm, not_implemented_for
-
-__all__ = [
-    "triangles",
-    "average_clustering",
-    "clustering",
-    "transitivity",
-]
-
-
-def _triangles(G, nodes, symmetrize=None):
-    if nodes is not None:
-        if is_single_node := (nodes in G):
-            nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]]
-        else:
-            nodes = list(nodes)
-        nodes = G._list_to_nodearray(nodes)
-    else:
-        is_single_node = False
-    if len(G) == 0:
-        return None, None, is_single_node
-    node_ids, triangles = plc.triangle_count(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(symmetrize=symmetrize),
-        start_list=nodes,
-        do_expensive_check=False,
-    )
-    return node_ids, triangles, is_single_node
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(version_added="24.02", _plc="triangle_count")
-def triangles(G, nodes=None):
-    G = _to_undirected_graph(G)
-    node_ids, triangles, is_single_node = _triangles(G, nodes)
-    if len(G) == 0:
-        return {}
-    if is_single_node:
-        return int(triangles[0])
-    return G._nodearrays_to_dict(node_ids, triangles)
-
-
-@triangles._should_run
-def _(G, nodes=None):
-    if nodes is None or nodes not in G:
-        return True
-    return "Fast algorithm when computing for a single node; not worth converting."
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="triangle_count")
-def clustering(G, nodes=None, weight=None):
-    """Directed graphs and `weight` parameter are not yet supported."""
-    if weight is not None:
-        raise NotImplementedError(
-            "Weighted implementation of clustering not currently supported"
-        )
-    G = _to_undirected_graph(G)
-    node_ids, triangles, is_single_node = _triangles(G, nodes)
-    if len(G) == 0:
-        return {}
-    if is_single_node:
-        numer = int(triangles[0])
-        if numer == 0:
-            return 0
-        degree = int((G.src_indices == nodes).sum())
-        return 2 * numer / (degree * (degree - 1))
-    degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
-    denom = degrees * (degrees - 1)
-    results = 2 * triangles / denom
-    results = cp.where(denom, results, 0)  # 0 where we divided by 0
-    return G._nodearrays_to_dict(node_ids, results)
-
-
-@clustering._can_run
-def _(G, nodes=None, weight=None):
-    return weight is None and not G.is_directed()
-
-
-@clustering._should_run
-def _(G, nodes=None, weight=None):
-    if nodes is None or nodes not in G:
-        return True
-    return "Fast algorithm when computing for a single node; not worth converting."
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="triangle_count")
-def average_clustering(G, nodes=None, weight=None, count_zeros=True):
-    """Directed graphs and `weight` parameter are not yet supported."""
-    if weight is not None:
-        raise NotImplementedError(
-            "Weighted implementation of average_clustering not currently supported"
-        )
-    G = _to_undirected_graph(G)
-    node_ids, triangles, is_single_node = _triangles(G, nodes)
-    if len(G) == 0:
-        raise ZeroDivisionError
-    degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
-    if not count_zeros:
-        mask = triangles != 0
-        triangles = triangles[mask]
-        if triangles.size == 0:
-            raise ZeroDivisionError
-        degrees = degrees[mask]
-    denom = degrees * (degrees - 1)
-    results = 2 * triangles / denom
-    if count_zeros:
-        results = cp.where(denom, results, 0)  # 0 where we divided by 0
-    return float(results.mean())
-
-
-@average_clustering._can_run
-def _(G, nodes=None, weight=None, count_zeros=True):
-    return weight is None and not G.is_directed()
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="triangle_count")
-def transitivity(G):
-    """Directed graphs are not yet supported."""
-    G = _to_undirected_graph(G)
-    if len(G) == 0:
-        return 0
-    node_ids, triangles = plc.triangle_count(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        start_list=None,
-        do_expensive_check=False,
-    )
-    numer = int(triangles.sum())
-    if numer == 0:
-        return 0
-    degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
-    denom = int((degrees * (degrees - 1)).sum())
-    return 2 * numer / denom
-
-
-@transitivity._can_run
-def _(G):
-    # Is transitivity supposed to work on directed graphs?
-    return not G.is_directed()
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/community/__init__.py
deleted file mode 100644
index 51a4f5c195f..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/community/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .louvain import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
deleted file mode 100644
index 52c512c454d..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import warnings
-
-import pylibcugraph as plc
-
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_undirected_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _groupby,
-    _seed_to_int,
-    networkx_algorithm,
-    not_implemented_for,
-)
-
-__all__ = ["louvain_communities"]
-
-# max_level argument was added to NetworkX 3.3
-if _nxver <= (3, 2):
-    _max_level_param = {
-        "max_level : int, optional": (
-            "Upper limit of the number of macro-iterations (max: 500)."
-        )
-    }
-else:
-    _max_level_param = {}
-
-
-def _louvain_communities_nx32(
-    G,
-    weight="weight",
-    resolution=1,
-    threshold=0.0000001,
-    seed=None,
-    *,
-    max_level=None,
-    dtype=None,
-):
-    """`seed` parameter is currently ignored, and self-loops are not yet supported."""
-    return _louvain_communities(
-        G, weight, resolution, threshold, max_level, seed, dtype=dtype
-    )
-
-
-def _louvain_communities(
-    G,
-    weight="weight",
-    resolution=1,
-    threshold=0.0000001,
-    max_level=None,
-    seed=None,
-    *,
-    dtype=None,
-):
-    """`seed` parameter is currently ignored, and self-loops are not yet supported."""
-    # NetworkX allows both directed and undirected, but cugraph only allows undirected.
-    seed = _seed_to_int(seed)  # Unused, but ensure it's valid for future compatibility
-    G = _to_undirected_graph(G, weight)
-    if G.src_indices.size == 0:
-        return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
-    if max_level is None:
-        max_level = 500
-    elif max_level > 500:
-        warnings.warn(
-            f"max_level is set too high (={max_level}), setting it to 500.",
-            UserWarning,
-            stacklevel=2,
-        )
-        max_level = 500
-    node_ids, clusters, modularity = plc.louvain(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(weight, 1, dtype),
-        max_level=max_level,
-        threshold=threshold,
-        resolution=resolution,
-        do_expensive_check=False,
-    )
-    groups = _groupby(clusters, node_ids, groups_are_canonical=True)
-    return [set(G._nodearray_to_list(ids)) for ids in groups.values()]
-
-
-_louvain_decorator = networkx_algorithm(
-    extra_params={
-        **_max_level_param,
-        **_dtype_param,
-    },
-    is_incomplete=True,  # seed not supported; self-loops not supported
-    is_different=True,  # RNG different
-    version_added="23.10",
-    _plc="louvain",
-    name="louvain_communities",
-)
-
-if _max_level_param:  # networkx <= 3.2
-    _louvain_communities_nx32.__name__ = "louvain_communities"
-    louvain_communities = not_implemented_for("directed")(
-        _louvain_decorator(_louvain_communities_nx32)
-    )
-
-    @louvain_communities._can_run
-    def _(
-        G,
-        weight="weight",
-        resolution=1,
-        threshold=0.0000001,
-        seed=None,
-        *,
-        max_level=None,
-        dtype=None,
-    ):
-        # NetworkX allows both directed and undirected, but cugraph only undirected.
-        return not G.is_directed()
-
-else:  # networkx >= 3.3
-    _louvain_communities.__name__ = "louvain_communities"
-    louvain_communities = not_implemented_for("directed")(
-        _louvain_decorator(_louvain_communities)
-    )
-
-    @louvain_communities._can_run
-    def _(
-        G,
-        weight="weight",
-        resolution=1,
-        threshold=0.0000001,
-        max_level=None,
-        seed=None,
-        *,
-        dtype=None,
-    ):
-        # NetworkX allows both directed and undirected, but cugraph only undirected.
-        return not G.is_directed()
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py
deleted file mode 100644
index 12a09b535c0..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .connected import *
-from .strongly_connected import *
-from .weakly_connected import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
deleted file mode 100644
index 24955e3eac8..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_undirected_graph
-from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for
-
-__all__ = [
-    "number_connected_components",
-    "connected_components",
-    "is_connected",
-    "node_connected_component",
-]
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(version_added="23.12", _plc="weakly_connected_components")
-def number_connected_components(G):
-    G = _to_undirected_graph(G)
-    return _number_connected_components(G)
-
-
-def _number_connected_components(G, symmetrize=None):
-    if G.src_indices.size == 0:
-        return len(G)
-    unused_node_ids, labels = plc.weakly_connected_components(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(symmetrize=symmetrize),
-        offsets=None,
-        indices=None,
-        weights=None,
-        labels=None,
-        do_expensive_check=False,
-    )
-    return cp.unique(labels).size
-
-
-@number_connected_components._can_run
-def _(G):
-    # NetworkX <= 3.2.1 does not check directedness for us
-    return not G.is_directed()
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(version_added="23.12", _plc="weakly_connected_components")
-def connected_components(G):
-    G = _to_undirected_graph(G)
-    return _connected_components(G)
-
-
-def _connected_components(G, symmetrize=None):
-    if G.src_indices.size == 0:
-        return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
-    node_ids, labels = plc.weakly_connected_components(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(symmetrize=symmetrize),
-        offsets=None,
-        indices=None,
-        weights=None,
-        labels=None,
-        do_expensive_check=False,
-    )
-    groups = _groupby(labels, node_ids)
-    return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(version_added="23.12", _plc="weakly_connected_components")
-def is_connected(G):
-    G = _to_undirected_graph(G)
-    return _is_connected(G)
-
-
-def _is_connected(G, symmetrize=None):
-    if len(G) == 0:
-        raise nx.NetworkXPointlessConcept(
-            "Connectivity is undefined for the null graph."
-        )
-    if G.src_indices.size == 0:
-        return len(G) == 1
-    unused_node_ids, labels = plc.weakly_connected_components(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(symmetrize=symmetrize),
-        offsets=None,
-        indices=None,
-        weights=None,
-        labels=None,
-        do_expensive_check=False,
-    )
-    return bool((labels == labels[0]).all())
-
-
-@not_implemented_for("directed")
-@networkx_algorithm(version_added="23.12", _plc="weakly_connected_components")
-def node_connected_component(G, n):
-    # We could also do plain BFS from n
-    G = _to_undirected_graph(G)
-    node_id = n if G.key_to_id is None else G.key_to_id[n]
-    node_ids, labels = plc.weakly_connected_components(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        offsets=None,
-        indices=None,
-        weights=None,
-        labels=None,
-        do_expensive_check=False,
-    )
-    indices = cp.nonzero(node_ids == node_id)[0]
-    if indices.size == 0:
-        return {n}
-    return G._nodearray_to_set(node_ids[labels == labels[indices[0]]])
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py
deleted file mode 100644
index a63b3237dfc..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_directed_graph
-from nx_cugraph.utils import _groupby, index_dtype, not_implemented_for
-
-__all__ = [
-    "number_strongly_connected_components",
-    "strongly_connected_components",
-    "is_strongly_connected",
-]
-
-
-def _strongly_connected_components(G):
-    # TODO: create utility function to convert just the indices to CSR
-    # TODO: this uses a legacy PLC function (strongly_connected_components)
-    N = len(G)
-    indices = cp.lexsort(cp.vstack((G.dst_indices, G.src_indices)))
-    dst_indices = G.dst_indices[indices]
-    offsets = cp.searchsorted(
-        G.src_indices, cp.arange(N + 1, dtype=index_dtype), sorter=indices
-    ).astype(index_dtype)
-    labels = cp.zeros(N, dtype=index_dtype)
-    plc.strongly_connected_components(
-        offsets=offsets,
-        indices=dst_indices,
-        weights=None,
-        num_verts=N,
-        num_edges=dst_indices.size,
-        labels=labels,
-    )
-    return labels
-
-
-# The networkx_algorithm decorator is (temporarily) removed to disable
-# dispatching for this function. The current cugraph
-# strongly_connected_components is a legacy implementation with known issues,
-# and in most cases should not be used until the cugraph team can provide an
-# update.
-#
-# Users can still call this via the nx_cugraph module directly:
-# >>> import nx_cugraph as nxcg
-# >>> nxcg.strongly_connected_components(...)
-
-
-@not_implemented_for("undirected")
-# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components")
-def strongly_connected_components(G):
-    G = _to_directed_graph(G)
-    if G.src_indices.size == 0:
-        return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
-    labels = _strongly_connected_components(G)
-    groups = _groupby(labels, cp.arange(len(G), dtype=index_dtype))
-    return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
-
-
-@not_implemented_for("undirected")
-# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components")
-def number_strongly_connected_components(G):
-    G = _to_directed_graph(G)
-    if G.src_indices.size == 0:
-        return len(G)
-    labels = _strongly_connected_components(G)
-    return cp.unique(labels).size
-
-
-@not_implemented_for("undirected")
-# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components")
-def is_strongly_connected(G):
-    G = _to_directed_graph(G)
-    if len(G) == 0:
-        raise nx.NetworkXPointlessConcept(
-            "Connectivity is undefined for the null graph."
-        )
-    if G.src_indices.size == 0:
-        return len(G) == 1
-    labels = _strongly_connected_components(G)
-    return bool((labels == labels[0]).all())
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/weakly_connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/weakly_connected.py
deleted file mode 100644
index e42acdd3d84..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/components/weakly_connected.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from nx_cugraph.convert import _to_directed_graph
-from nx_cugraph.utils import networkx_algorithm, not_implemented_for
-
-from .connected import (
-    _connected_components,
-    _is_connected,
-    _number_connected_components,
-)
-
-__all__ = [
-    "number_weakly_connected_components",
-    "weakly_connected_components",
-    "is_weakly_connected",
-]
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def weakly_connected_components(G):
-    G = _to_directed_graph(G)
-    return _connected_components(G, symmetrize="union")
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def number_weakly_connected_components(G):
-    G = _to_directed_graph(G)
-    return _number_connected_components(G, symmetrize="union")
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def is_weakly_connected(G):
-    G = _to_directed_graph(G)
-    return _is_connected(G, symmetrize="union")
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py
deleted file mode 100644
index e69ee88a17c..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/core.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import pylibcugraph as plc
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_undirected_graph
-from nx_cugraph.utils import (
-    _get_int_dtype,
-    index_dtype,
-    networkx_algorithm,
-    not_implemented_for,
-)
-
-__all__ = ["core_number", "k_truss"]
-
-
-@not_implemented_for("directed")
-@not_implemented_for("multigraph")
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="core_number")
-def core_number(G):
-    """Directed graphs are not yet supported."""
-    G = _to_undirected_graph(G)
-    if len(G) == 0:
-        return {}
-    if nxcg.number_of_selfloops(G) > 0:
-        raise nx.NetworkXNotImplemented(
-            "Input graph has self loops which is not permitted; "
-            "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
-        )
-    node_ids, core_numbers = plc.core_number(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        degree_type="bidirectional",
-        do_expensive_check=False,
-    )
-    return G._nodearrays_to_dict(node_ids, core_numbers)
-
-
-@core_number._can_run
-def _(G):
-    return not G.is_directed()
-
-
-@not_implemented_for("directed")
-@not_implemented_for("multigraph")
-@networkx_algorithm(is_incomplete=True, version_added="23.12", _plc="k_truss_subgraph")
-def k_truss(G, k):
-    if is_nx := isinstance(G, nx.Graph):
-        is_compat_graph = isinstance(G, nxcg.Graph)
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    else:
-        is_compat_graph = False
-    if nxcg.number_of_selfloops(G) > 0:
-        if _nxver <= (3, 2):
-            exc_class = nx.NetworkXError
-        else:
-            exc_class = nx.NetworkXNotImplemented
-        raise exc_class(
-            "Input graph has self loops which is not permitted; "
-            "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
-        )
-
-    # TODO: create renumbering helper function(s)
-    if k < 3:
-        # k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a
-        # boundary condition. Here, all we need to do is drop nodes with zero degree.
-        # Technically, it would be okay to delete this branch of code, because
-        # plc.k_truss_subgraph behaves the same for 0 <= k < 3. We keep this branch b/c
-        # it's faster and has an "early return" if there are no nodes with zero degree.
-        degrees = G._degrees_array()
-        # Renumber step 0: node indices
-        node_indices = degrees.nonzero()[0]
-        if degrees.size == node_indices.size:
-            # No change
-            return G if is_nx else G.copy()
-        src_indices = G.src_indices
-        dst_indices = G.dst_indices
-        # Renumber step 1: edge values (no changes needed)
-        edge_values = {key: val.copy() for key, val in G.edge_values.items()}
-        edge_masks = {key: val.copy() for key, val in G.edge_masks.items()}
-    else:
-        edge_dtype = _get_int_dtype(G.src_indices.size - 1)
-        edge_indices = cp.arange(G.src_indices.size, dtype=edge_dtype)
-        src_indices, dst_indices, edge_indices, _ = plc.k_truss_subgraph(
-            resource_handle=plc.ResourceHandle(),
-            graph=G._get_plc_graph(edge_array=edge_indices),
-            k=k,
-            do_expensive_check=False,
-        )
-        # Renumber step 0: node indices
-        node_indices = cp.unique(cp.concatenate([src_indices, dst_indices]))
-        # Renumber step 1: edge values
-        if edge_indices.dtype != edge_dtype:
-            # The returned edge_indices may have different dtype (and float)
-            edge_indices = edge_indices.astype(edge_dtype)
-        edge_values = {key: val[edge_indices] for key, val in G.edge_values.items()}
-        edge_masks = {key: val[edge_indices] for key, val in G.edge_masks.items()}
-    # Renumber step 2: edge indices
-    src_indices = cp.searchsorted(node_indices, src_indices).astype(index_dtype)
-    dst_indices = cp.searchsorted(node_indices, dst_indices).astype(index_dtype)
-    # Renumber step 3: node values
-    node_values = {key: val[node_indices] for key, val in G.node_values.items()}
-    node_masks = {key: val[node_indices] for key, val in G.node_masks.items()}
-    # Renumber step 4: key_to_id
-    if (id_to_key := G.id_to_key) is not None:
-        key_to_id = {
-            id_to_key[old_index]: new_index
-            for new_index, old_index in enumerate(node_indices.tolist())
-        }
-    else:
-        key_to_id = None
-    # Same as calling `G.from_coo`, but use __class__ to indicate it's a classmethod.
-    new_graph = G.__class__.from_coo(
-        node_indices.size,
-        src_indices,
-        dst_indices,
-        edge_values,
-        edge_masks,
-        node_values,
-        node_masks,
-        key_to_id=key_to_id,
-        use_compat_graph=is_compat_graph,
-    )
-    new_graph.graph.update(G.graph)
-    return new_graph
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/dag.py b/python/nx-cugraph/nx_cugraph/algorithms/dag.py
deleted file mode 100644
index 64be0a58105..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/dag.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import index_dtype, networkx_algorithm
-
-__all__ = [
-    "descendants",
-    "ancestors",
-]
-
-
-def _ancestors_and_descendants(G, source, *, is_ancestors):
-    G = _to_graph(G)
-    if source not in G:
-        hash(source)  # To raise TypeError if appropriate
-        raise nx.NetworkXError(
-            f"The node {source} is not in the {G.__class__.__name__.lower()}."
-        )
-    src_index = source if G.key_to_id is None else G.key_to_id[source]
-    distances, predecessors, node_ids = plc.bfs(
-        handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(switch_indices=is_ancestors),
-        sources=cp.array([src_index], dtype=index_dtype),
-        direction_optimizing=False,
-        depth_limit=-1,
-        compute_predecessors=False,
-        do_expensive_check=False,
-    )
-    mask = (distances != np.iinfo(distances.dtype).max) & (distances != 0)
-    return G._nodearray_to_set(node_ids[mask])
-
-
-@networkx_algorithm(version_added="24.02", _plc="bfs")
-def descendants(G, source):
-    return _ancestors_and_descendants(G, source, is_ancestors=False)
-
-
-@networkx_algorithm(version_added="24.02", _plc="bfs")
-def ancestors(G, source):
-    return _ancestors_and_descendants(G, source, is_ancestors=True)
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py
deleted file mode 100644
index 47a349bcf31..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import cupy as cp
-import numpy as np
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import index_dtype, networkx_algorithm
-
-if TYPE_CHECKING:  # pragma: no cover
-    from nx_cugraph.typing import IndexValue
-
-__all__ = ["is_isolate", "isolates", "number_of_isolates"]
-
-
-@networkx_algorithm(version_added="23.10")
-def is_isolate(G, n):
-    G = _to_graph(G)
-    index = n if G.key_to_id is None else G.key_to_id[n]
-    return not (
-        (G.src_indices == index).any().tolist()
-        or G.is_directed()
-        and (G.dst_indices == index).any().tolist()
-    )
-
-
-@is_isolate._should_run
-def _(G, n):
-    return "Fast algorithm; not worth converting."
-
-
-def _mark_isolates(G, symmetrize=None) -> cp.ndarray[bool]:
-    """Return a boolean mask array indicating indices of isolated nodes."""
-    mark_isolates = cp.ones(len(G), bool)
-    if G.is_directed() and symmetrize == "intersection":
-        N = G._N
-        # Upcast to int64 so indices don't overflow
-        src_dst = N * G.src_indices.astype(np.int64) + G.dst_indices
-        src_dst_T = G.src_indices + N * G.dst_indices.astype(np.int64)
-        src_dst_new = cp.intersect1d(src_dst, src_dst_T)
-        new_indices = cp.floor_divide(src_dst_new, N, dtype=index_dtype)
-        mark_isolates[new_indices] = False
-    else:
-        mark_isolates[G.src_indices] = False
-        if G.is_directed():
-            mark_isolates[G.dst_indices] = False
-    return mark_isolates
-
-
-def _isolates(G, symmetrize=None) -> cp.ndarray[IndexValue]:
-    """Like isolates, but return an array of indices instead of an iterator of nodes."""
-    G = _to_graph(G)
-    return cp.nonzero(_mark_isolates(G, symmetrize=symmetrize))[0]
-
-
-@networkx_algorithm(version_added="23.10")
-def isolates(G):
-    G = _to_graph(G)
-    return G._nodeiter_to_iter(iter(_isolates(G).tolist()))
-
-
-@networkx_algorithm(version_added="23.10")
-def number_of_isolates(G):
-    G = _to_graph(G)
-    return int(cp.count_nonzero(_mark_isolates(G)))
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/__init__.py
deleted file mode 100644
index a68d6940d02..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .hits_alg import *
-from .pagerank_alg import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py
deleted file mode 100644
index cc59fd5eb64..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    index_dtype,
-    networkx_algorithm,
-)
-
-__all__ = ["hits"]
-
-
-@networkx_algorithm(
-    extra_params={
-        'weight : string or None, optional (default="weight")': (
-            "The edge attribute to use as the edge weight."
-        ),
-        **_dtype_param,
-    },
-    version_added="23.12",
-    _plc="hits",
-)
-def hits(
-    G,
-    max_iter=100,
-    tol=1.0e-8,
-    nstart=None,
-    normalized=True,
-    *,
-    weight="weight",
-    dtype=None,
-):
-    G = _to_graph(G, weight, 1, np.float32)
-    if (N := len(G)) == 0:
-        return {}, {}
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    if nstart is not None:
-        nstart = G._dict_to_nodearray(nstart, 0, dtype)
-    if max_iter <= 0:
-        if _nxver <= (3, 2):
-            raise ValueError("`maxiter` must be a positive integer.")
-        raise nx.PowerIterationFailedConvergence(max_iter)
-    try:
-        node_ids, hubs, authorities = plc.hits(
-            resource_handle=plc.ResourceHandle(),
-            graph=G._get_plc_graph(weight, 1, dtype, store_transposed=True),
-            tol=tol,
-            initial_hubs_guess_vertices=(
-                None if nstart is None else cp.arange(N, dtype=index_dtype)
-            ),
-            initial_hubs_guess_values=nstart,
-            max_iter=max_iter,
-            normalized=normalized,
-            do_expensive_check=False,
-        )
-    except RuntimeError as exc:
-        # Errors from PLC are sometimes a little scary and not very helpful
-        raise nx.PowerIterationFailedConvergence(max_iter) from exc
-    return (
-        G._nodearrays_to_dict(node_ids, hubs),
-        G._nodearrays_to_dict(node_ids, authorities),
-    )
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py
deleted file mode 100644
index 41203a2bc22..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    index_dtype,
-    networkx_algorithm,
-)
-
-__all__ = ["pagerank"]
-
-
-@networkx_algorithm(
-    extra_params=_dtype_param,
-    is_incomplete=True,  # dangling not supported
-    version_added="23.12",
-    _plc={"pagerank", "personalized_pagerank"},
-)
-def pagerank(
-    G,
-    alpha=0.85,
-    personalization=None,
-    max_iter=100,
-    tol=1.0e-6,
-    nstart=None,
-    weight="weight",
-    dangling=None,
-    *,
-    dtype=None,
-):
-    """`dangling` parameter is not supported, but it is checked for validity."""
-    G = _to_graph(G, weight, 1, np.float32)
-    if (N := len(G)) == 0:
-        return {}
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    if nstart is not None:
-        nstart = G._dict_to_nodearray(nstart, 0, dtype=dtype)
-        if (total := nstart.sum()) == 0:
-            raise ZeroDivisionError
-        nstart /= total
-    if personalization is not None:
-        personalization = G._dict_to_nodearray(personalization, 0, dtype=dtype)
-        if (total := personalization.sum()) == 0:
-            raise ZeroDivisionError
-        personalization /= total
-    if dangling is not None:
-        # Check if given dangling is valid even though we don't use it
-        dangling = G._dict_to_nodearray(dangling, 0)  # Check validity
-        if dangling.sum() == 0:
-            raise ZeroDivisionError
-        if (G._out_degrees_array() == 0).any():
-            raise NotImplementedError("custom dangling weights is not supported")
-    if max_iter <= 0:
-        raise nx.PowerIterationFailedConvergence(max_iter)
-    kwargs = {
-        "resource_handle": plc.ResourceHandle(),
-        "graph": G._get_plc_graph(weight, 1, dtype, store_transposed=True),
-        "precomputed_vertex_out_weight_vertices": None,
-        "precomputed_vertex_out_weight_sums": None,
-        "initial_guess_vertices": (
-            None if nstart is None else cp.arange(N, dtype=index_dtype)
-        ),
-        "initial_guess_values": nstart,
-        "alpha": alpha,
-        "epsilon": N * tol,
-        "max_iterations": max_iter,
-        "do_expensive_check": False,
-        "fail_on_nonconvergence": False,
-    }
-    if personalization is None:
-        node_ids, values, is_converged = plc.pagerank(**kwargs)
-    else:
-        node_ids, values, is_converged = plc.personalized_pagerank(
-            personalization_vertices=cp.arange(N, dtype=index_dtype),  # Why?
-            personalization_values=personalization,
-            **kwargs,
-        )
-    if not is_converged:
-        raise nx.PowerIterationFailedConvergence(max_iter)
-    return G._nodearrays_to_dict(node_ids, values)
-
-
-@pagerank._can_run
-def _(
-    G,
-    alpha=0.85,
-    personalization=None,
-    max_iter=100,
-    tol=1.0e-6,
-    nstart=None,
-    weight="weight",
-    dangling=None,
-    *,
-    dtype=None,
-):
-    return dangling is None
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/operators/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/operators/__init__.py
deleted file mode 100644
index 32fd45f5726..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/operators/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .unary import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py b/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py
deleted file mode 100644
index 75dc5fbc706..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import index_dtype, networkx_algorithm
-
-__all__ = ["complement", "reverse"]
-
-
-@networkx_algorithm(version_added="24.02")
-def complement(G):
-    is_compat_graph = isinstance(G, nxcg.Graph)
-    G = _to_graph(G)
-    N = G._N
-    # Upcast to int64 so indices don't overflow.
-    edges_a_b = N * G.src_indices.astype(np.int64) + G.dst_indices
-    # Now compute flattened indices for all edges except self-loops
-    # Alt (slower):
-    # edges_full = np.arange(N * N)
-    # edges_full = edges_full[(edges_full % (N + 1)).astype(bool)]
-    edges_full = cp.arange(1, N * (N - 1) + 1) + cp.repeat(cp.arange(N - 1), N)
-    edges_comp = cp.setdiff1d(
-        edges_full,
-        edges_a_b,
-        assume_unique=not G.is_multigraph(),
-    )
-    src_indices, dst_indices = cp.divmod(edges_comp, N)
-    return G.__class__.from_coo(
-        N,
-        src_indices.astype(index_dtype),
-        dst_indices.astype(index_dtype),
-        key_to_id=G.key_to_id,
-        use_compat_graph=is_compat_graph,
-    )
-
-
-@networkx_algorithm(version_added="24.02")
-def reverse(G, copy=True):
-    if not G.is_directed():
-        raise nx.NetworkXError("Cannot reverse an undirected graph.")
-    if isinstance(G, nx.Graph):
-        is_compat_graph = isinstance(G, nxcg.Graph)
-        if not copy and not is_compat_graph:
-            raise RuntimeError(
-                "Using `copy=False` is invalid when using a NetworkX graph "
-                "as input to `nx_cugraph.reverse`"
-            )
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    else:
-        is_compat_graph = False
-    rv = G.reverse(copy=copy)
-    if is_compat_graph:
-        return rv._to_compat_graph()
-    return rv
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py b/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py
deleted file mode 100644
index c87abdf9fa7..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/reciprocity.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-from nx_cugraph.convert import _to_directed_graph
-from nx_cugraph.utils import networkx_algorithm, not_implemented_for
-
-__all__ = ["reciprocity", "overall_reciprocity"]
-
-
-@not_implemented_for("undirected", "multigraph")
-@networkx_algorithm(version_added="24.02")
-def reciprocity(G, nodes=None):
-    if nodes is None:
-        return overall_reciprocity(G)
-    G = _to_directed_graph(G)
-    N = G._N
-    # 'nodes' can also be a single node identifier
-    if nodes in G:
-        index = nodes if G.key_to_id is None else G.key_to_id[nodes]
-        mask = (G.src_indices == index) | (G.dst_indices == index)
-        src_indices = G.src_indices[mask]
-        if src_indices.size == 0:
-            raise nx.NetworkXError("Not defined for isolated nodes.")
-        dst_indices = G.dst_indices[mask]
-        # Create two lists of edge identifiers, one for each direction.
-        # Edge identifiers can be created from a pair of node
-        # identifiers. Simply adding src IDs to dst IDs is not adequate, so
-        # make one set of values (either src or dst depending on direction)
-        # unique by multiplying values by N.
-        # Upcast to int64 so indices don't overflow.
-        edges_a_b = N * src_indices.astype(np.int64) + dst_indices
-        edges_b_a = src_indices + N * dst_indices.astype(np.int64)
-        # Find the matching edge identifiers in each list. The edge identifier
-        # generation ensures the ID for A->B == the ID for B->A
-        recip_indices = cp.intersect1d(
-            edges_a_b,
-            edges_b_a,
-            # assume_unique=True,  # cupy <= 12.2.0 also assumes sorted
-        )
-        num_selfloops = (src_indices == dst_indices).sum().tolist()
-        return (recip_indices.size - num_selfloops) / edges_a_b.size
-
-    # Don't include self-loops
-    mask = G.src_indices != G.dst_indices
-    src_indices = G.src_indices[mask]
-    dst_indices = G.dst_indices[mask]
-    # Create two lists of edges, one for each direction, and find the matching
-    # IDs in each list (see description above).
-    edges_a_b = N * src_indices.astype(np.int64) + dst_indices
-    edges_b_a = src_indices + N * dst_indices.astype(np.int64)
-    recip_indices = cp.intersect1d(
-        edges_a_b,
-        edges_b_a,
-        # assume_unique=True,  # cupy <= 12.2.0 also assumes sorted
-    )
-    numer = cp.bincount(recip_indices // N, minlength=N)
-    denom = cp.bincount(src_indices, minlength=N)
-    denom += cp.bincount(dst_indices, minlength=N)
-    recip = 2 * numer / denom
-    node_ids = G._nodekeys_to_nodearray(nodes)
-    return G._nodearrays_to_dict(node_ids, recip[node_ids])
-
-
-@not_implemented_for("undirected", "multigraph")
-@networkx_algorithm(version_added="24.02")
-def overall_reciprocity(G):
-    G = _to_directed_graph(G)
-    if G.number_of_edges() == 0:
-        raise nx.NetworkXError("Not defined for empty graphs")
-    # Create two lists of edges, one for each direction, and find the matching
-    # IDs in each list (see description in reciprocity()).
-    edges_a_b = G._N * G.src_indices.astype(np.int64) + G.dst_indices
-    edges_b_a = G.src_indices + G._N * G.dst_indices.astype(np.int64)
-    recip_indices = cp.intersect1d(
-        edges_a_b,
-        edges_b_a,
-        # assume_unique=True,  # cupy <= 12.2.0 also assumes sorted
-    )
-    num_selfloops = (G.src_indices == G.dst_indices).sum().tolist()
-    return (recip_indices.size - num_selfloops) / edges_a_b.size
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py
deleted file mode 100644
index 9d87389a98e..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .generic import *
-from .unweighted import *
-from .weighted import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py
deleted file mode 100644
index ab3c7214303..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _dtype_param, _get_float_dtype, networkx_algorithm
-
-from .unweighted import _bfs
-from .weighted import _sssp
-
-__all__ = [
-    "shortest_path",
-    "shortest_path_length",
-    "has_path",
-]
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def has_path(G, source, target):
-    # TODO PERF: make faster in core
-    try:
-        nxcg.bidirectional_shortest_path(G, source, target)
-    except nx.NetworkXNoPath:
-        return False
-    return True
-
-
-@networkx_algorithm(
-    extra_params=_dtype_param, version_added="24.04", _plc={"bfs", "sssp"}
-)
-def shortest_path(
-    G, source=None, target=None, weight=None, method="dijkstra", *, dtype=None
-):
-    """Negative weights are not yet supported."""
-    if method not in {"dijkstra", "bellman-ford"}:
-        raise ValueError(f"method not supported: {method}")
-    if weight is None:
-        method = "unweighted"
-    if source is None:
-        if target is None:
-            # All pairs
-            if method == "unweighted":
-                paths = nxcg.all_pairs_shortest_path(G)
-            elif method == "dijkstra":
-                paths = nxcg.all_pairs_dijkstra_path(G, weight=weight, dtype=dtype)
-            else:  # method == 'bellman-ford':
-                paths = nxcg.all_pairs_bellman_ford_path(G, weight=weight, dtype=dtype)
-            if _nxver <= (3, 4):
-                paths = dict(paths)
-        # To target
-        elif method == "unweighted":
-            paths = nxcg.single_target_shortest_path(G, target)
-        else:
-            # method == "dijkstra":
-            # method == 'bellman-ford':
-            # XXX: it seems weird that `reverse_path=True` is necessary here
-            G = _to_graph(G, weight, 1, np.float32)
-            dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-            paths = _sssp(
-                G, target, weight, return_type="path", dtype=dtype, reverse_path=True
-            )
-    elif target is None:
-        # From source
-        if method == "unweighted":
-            paths = nxcg.single_source_shortest_path(G, source)
-        elif method == "dijkstra":
-            paths = nxcg.single_source_dijkstra_path(
-                G, source, weight=weight, dtype=dtype
-            )
-        else:  # method == 'bellman-ford':
-            paths = nxcg.single_source_bellman_ford_path(
-                G, source, weight=weight, dtype=dtype
-            )
-    # From source to target
-    elif method == "unweighted":
-        paths = nxcg.bidirectional_shortest_path(G, source, target)
-    else:
-        # method == "dijkstra":
-        # method == 'bellman-ford':
-        paths = nxcg.bellman_ford_path(G, source, target, weight, dtype=dtype)
-    return paths
-
-
-@shortest_path._can_run
-def _(G, source=None, target=None, weight=None, method="dijkstra", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(
-    extra_params=_dtype_param, version_added="24.04", _plc={"bfs", "sssp"}
-)
-def shortest_path_length(
-    G, source=None, target=None, weight=None, method="dijkstra", *, dtype=None
-):
-    """Negative weights are not yet supported."""
-    if method not in {"dijkstra", "bellman-ford"}:
-        raise ValueError(f"method not supported: {method}")
-    if weight is None:
-        method = "unweighted"
-    if source is None:
-        if target is None:
-            # All pairs
-            if method == "unweighted":
-                lengths = nxcg.all_pairs_shortest_path_length(G)
-            elif method == "dijkstra":
-                lengths = nxcg.all_pairs_dijkstra_path_length(
-                    G, weight=weight, dtype=dtype
-                )
-            else:  # method == 'bellman-ford':
-                lengths = nxcg.all_pairs_bellman_ford_path_length(
-                    G, weight=weight, dtype=dtype
-                )
-        # To target
-        elif method == "unweighted":
-            lengths = nxcg.single_target_shortest_path_length(G, target)
-            if _nxver <= (3, 4):
-                lengths = dict(lengths)
-        elif method == "dijkstra":
-            lengths = nxcg.single_source_dijkstra_path_length(
-                G, target, weight=weight, dtype=dtype
-            )
-        else:  # method == 'bellman-ford':
-            lengths = nxcg.single_source_bellman_ford_path_length(
-                G, target, weight=weight, dtype=dtype
-            )
-    elif target is None:
-        # From source
-        if method == "unweighted":
-            lengths = nxcg.single_source_shortest_path_length(G, source)
-        elif method == "dijkstra":
-            lengths = nxcg.single_source_dijkstra_path_length(
-                G, source, weight=weight, dtype=dtype
-            )
-        else:  # method == 'bellman-ford':
-            lengths = nxcg.single_source_bellman_ford_path_length(
-                G, source, weight=weight, dtype=dtype
-            )
-    # From source to target
-    elif method == "unweighted":
-        G = _to_graph(G)
-        lengths = _bfs(G, source, None, "Source", return_type="length", target=target)
-    elif method == "dijkstra":
-        lengths = nxcg.dijkstra_path_length(G, source, target, weight, dtype=dtype)
-    else:  # method == 'bellman-ford':
-        lengths = nxcg.bellman_ford_path_length(G, source, target, weight, dtype=dtype)
-    return lengths
-
-
-@shortest_path_length._can_run
-def _(G, source=None, target=None, weight=None, method="dijkstra", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py
deleted file mode 100644
index e9c515632ca..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import itertools
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
-
-__all__ = [
-    "bidirectional_shortest_path",
-    "single_source_shortest_path",
-    "single_source_shortest_path_length",
-    "single_target_shortest_path",
-    "single_target_shortest_path_length",
-    "all_pairs_shortest_path",
-    "all_pairs_shortest_path_length",
-]
-
-concat = itertools.chain.from_iterable
-
-
-@networkx_algorithm(version_added="23.12", _plc="bfs")
-def single_source_shortest_path_length(G, source, cutoff=None):
-    G = _to_graph(G)
-    return _bfs(G, source, cutoff, "Source", return_type="length")
-
-
-@networkx_algorithm(version_added="23.12", _plc="bfs")
-def single_target_shortest_path_length(G, target, cutoff=None):
-    G = _to_graph(G)
-    rv = _bfs(G, target, cutoff, "Target", return_type="length")
-    if _nxver <= (3, 4):
-        return iter(rv.items())
-    return rv
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def all_pairs_shortest_path_length(G, cutoff=None):
-    # TODO PERF: batched bfs to compute many at once
-    G = _to_graph(G)
-    for n in G:
-        yield (n, _bfs(G, n, cutoff, "Source", return_type="length"))
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def bidirectional_shortest_path(G, source, target):
-    # TODO PERF: do bidirectional traversal in core
-    G = _to_graph(G)
-    if source not in G or target not in G:
-        if _nxver <= (3, 3):
-            raise nx.NodeNotFound(
-                f"Either source {source} or target {target} is not in G"
-            )
-        missing = f"Source {source}" if source not in G else f"Target {target}"
-        raise nx.NodeNotFound(f"{missing} is not in G")
-    return _bfs(G, source, None, "Source", return_type="path", target=target)
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def single_source_shortest_path(G, source, cutoff=None):
-    G = _to_graph(G)
-    return _bfs(G, source, cutoff, "Source", return_type="path")
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def single_target_shortest_path(G, target, cutoff=None):
-    G = _to_graph(G)
-    return _bfs(G, target, cutoff, "Target", return_type="path", reverse_path=True)
-
-
-@networkx_algorithm(version_added="24.04", _plc="bfs")
-def all_pairs_shortest_path(G, cutoff=None):
-    # TODO PERF: batched bfs to compute many at once
-    G = _to_graph(G)
-    for n in G:
-        yield (n, _bfs(G, n, cutoff, "Source", return_type="path"))
-
-
-def _bfs(
-    G, source, cutoff, kind, *, return_type, reverse_path=False, target=None, scale=None
-):
-    """BFS for unweighted shortest path algorithms.
-
-    Parameters
-    ----------
-    source: node label
-
-    cutoff: int, optional
-
-    kind: {"Source", "Target"}
-
-    return_type: {"length", "path", "length-path"}
-
-    reverse_path: bool
-
-    target: node label
-
-    scale: int or float, optional
-        The amount to scale the lengths
-    """
-    # DRY: _sssp in weighted.py has similar code
-    if source not in G:
-        # Different message to pass networkx tests
-        if return_type == "length":
-            raise nx.NodeNotFound(f"{kind} {source} is not in G")
-        raise nx.NodeNotFound(f"{kind} {source} not in G")
-    if target is not None:
-        if source == target or cutoff is not None and cutoff <= 0:
-            if return_type == "path":
-                return [source]
-            if return_type == "length":
-                return 0
-            # return_type == "length-path"
-            return 0, [source]
-        if target not in G or G.src_indices.size == 0:
-            raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-    elif G.src_indices.size == 0 or cutoff is not None and cutoff <= 0:
-        if return_type == "path":
-            return {source: [source]}
-        if return_type == "length":
-            return {source: 0}
-        # return_type == "length-path"
-        return {source: 0}, {source: [source]}
-
-    if cutoff is None or np.isinf(cutoff):
-        cutoff = -1
-    src_index = source if G.key_to_id is None else G.key_to_id[source]
-    distances, predecessors, node_ids = plc.bfs(
-        handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(switch_indices=kind == "Target"),
-        sources=cp.array([src_index], index_dtype),
-        direction_optimizing=False,  # True for undirected only; what's recommended?
-        depth_limit=cutoff,
-        compute_predecessors=return_type != "length",
-        do_expensive_check=False,
-    )
-    mask = distances != np.iinfo(distances.dtype).max
-    node_ids = node_ids[mask]
-    if return_type != "path":
-        lengths = distances = distances[mask]
-        if scale is not None:
-            lengths = scale * lengths
-        lengths = G._nodearrays_to_dict(node_ids, lengths)
-        if target is not None:
-            if target not in lengths:
-                raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-            lengths = lengths[target]
-    if return_type != "length":
-        if target is not None:
-            d = dict(zip(node_ids.tolist(), predecessors[mask].tolist()))
-            dst_index = target if G.key_to_id is None else G.key_to_id[target]
-            if dst_index not in d:
-                raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-            cur = dst_index
-            paths = [dst_index]
-            while cur != src_index:
-                cur = d[cur]
-                paths.append(cur)
-            if (id_to_key := G.id_to_key) is not None:
-                if reverse_path:
-                    paths = [id_to_key[cur] for cur in paths]
-                else:
-                    paths = [id_to_key[cur] for cur in reversed(paths)]
-            elif not reverse_path:
-                paths.reverse()
-        else:
-            if return_type == "path":
-                distances = distances[mask]
-            groups = _groupby(distances, [predecessors[mask], node_ids])
-
-            # `pred_node_iter` does the equivalent as these nested for loops:
-            # for length in range(1, len(groups)):
-            #     preds, nodes = groups[length]
-            #     for pred, node in zip(preds.tolist(), nodes.tolist()):
-            if G.key_to_id is None:
-                pred_node_iter = concat(
-                    zip(*(x.tolist() for x in groups[length]))
-                    for length in range(1, len(groups))
-                )
-            else:
-                pred_node_iter = concat(
-                    zip(*(G._nodeiter_to_iter(x.tolist()) for x in groups[length]))
-                    for length in range(1, len(groups))
-                )
-            # Consider making utility functions for creating paths
-            paths = {source: [source]}
-            if reverse_path:
-                for pred, node in pred_node_iter:
-                    paths[node] = [node, *paths[pred]]
-            else:
-                for pred, node in pred_node_iter:
-                    paths[node] = [*paths[pred], node]
-    if return_type == "path":
-        return paths
-    if return_type == "length":
-        return lengths
-    # return_type == "length-path"
-    return lengths, paths
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/weighted.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/weighted.py
deleted file mode 100644
index 032ef2c7fdf..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/weighted.py
+++ /dev/null
@@ -1,402 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import (
-    _dtype_param,
-    _get_float_dtype,
-    _groupby,
-    networkx_algorithm,
-)
-
-from .unweighted import _bfs
-
-__all__ = [
-    "dijkstra_path",
-    "dijkstra_path_length",
-    "single_source_dijkstra",
-    "single_source_dijkstra_path",
-    "single_source_dijkstra_path_length",
-    "all_pairs_dijkstra",
-    "all_pairs_dijkstra_path",
-    "all_pairs_dijkstra_path_length",
-    "bellman_ford_path",
-    "bellman_ford_path_length",
-    "single_source_bellman_ford",
-    "single_source_bellman_ford_path",
-    "single_source_bellman_ford_path_length",
-    "all_pairs_bellman_ford_path",
-    "all_pairs_bellman_ford_path_length",
-]
-
-
-def _add_doc(func):
-    func.__doc__ = (
-        "Negative cycles are not yet supported. ``NotImplementedError`` will be raised "
-        "if there are negative edge weights. We plan to support negative edge weights "
-        "soon. Also, callable ``weight`` argument is not supported."
-    )
-    return func
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def dijkstra_path(G, source, target, weight="weight", *, dtype=None):
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    return _sssp(G, source, weight, target, return_type="path", dtype=dtype)
-
-
-@dijkstra_path._can_run
-def _(G, source, target, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def bellman_ford_path(G, source, target, weight="weight", *, dtype=None):
-    return dijkstra_path(G, source, target, weight=weight, dtype=dtype)
-
-
-@bellman_ford_path._can_run
-def _(G, source, target, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def dijkstra_path_length(G, source, target, weight="weight", *, dtype=None):
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    return _sssp(G, source, weight, target, return_type="length", dtype=dtype)
-
-
-@dijkstra_path._can_run
-def _(G, source, target, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def bellman_ford_path_length(G, source, target, weight="weight", *, dtype=None):
-    return dijkstra_path_length(G, source, target, weight=weight, dtype=dtype)
-
-
-@bellman_ford_path_length._can_run
-def _(G, source, target, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def single_source_dijkstra_path(G, source, cutoff=None, weight="weight", *, dtype=None):
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    return _sssp(G, source, weight, return_type="path", dtype=dtype, cutoff=cutoff)
-
-
-@single_source_dijkstra_path._can_run
-def _(G, source, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def single_source_bellman_ford_path(G, source, weight="weight", *, dtype=None):
-    return single_source_dijkstra_path(G, source, weight=weight, dtype=dtype)
-
-
-@single_source_bellman_ford_path._can_run
-def _(G, source, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def single_source_dijkstra_path_length(
-    G, source, cutoff=None, weight="weight", *, dtype=None
-):
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    return _sssp(G, source, weight, return_type="length", dtype=dtype, cutoff=cutoff)
-
-
-@single_source_dijkstra_path_length._can_run
-def _(G, source, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def single_source_bellman_ford_path_length(G, source, weight="weight", *, dtype=None):
-    return single_source_dijkstra_path_length(G, source, weight=weight, dtype=dtype)
-
-
-@single_source_bellman_ford_path_length._can_run
-def _(G, source, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def single_source_dijkstra(
-    G, source, target=None, cutoff=None, weight="weight", *, dtype=None
-):
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    return _sssp(
-        G, source, weight, target, return_type="length-path", dtype=dtype, cutoff=cutoff
-    )
-
-
-@single_source_dijkstra._can_run
-def _(G, source, target=None, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def single_source_bellman_ford(G, source, target=None, weight="weight", *, dtype=None):
-    return single_source_dijkstra(G, source, target=target, weight=weight, dtype=dtype)
-
-
-@single_source_bellman_ford._can_run
-def _(G, source, target=None, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def all_pairs_dijkstra(G, cutoff=None, weight="weight", *, dtype=None):
-    # TODO PERF: batched bfs to compute many at once
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    for n in G:
-        yield (
-            n,
-            _sssp(G, n, weight, return_type="length-path", dtype=dtype, cutoff=cutoff),
-        )
-
-
-@all_pairs_dijkstra._can_run
-def _(G, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def all_pairs_dijkstra_path_length(G, cutoff=None, weight="weight", *, dtype=None):
-    # TODO PERF: batched bfs to compute many at once
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    for n in G:
-        yield (n, _sssp(G, n, weight, return_type="length", dtype=dtype, cutoff=cutoff))
-
-
-@all_pairs_dijkstra_path_length._can_run
-def _(G, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def all_pairs_bellman_ford_path_length(G, weight="weight", *, dtype=None):
-    return all_pairs_dijkstra_path_length(G, weight=weight, dtype=None)
-
-
-@all_pairs_bellman_ford_path_length._can_run
-def _(G, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.08", _plc="sssp")
-def all_pairs_dijkstra_path(G, cutoff=None, weight="weight", *, dtype=None):
-    # TODO PERF: batched bfs to compute many at once
-    G = _to_graph(G, weight, 1, np.float32)
-    dtype = _get_float_dtype(dtype, graph=G, weight=weight)
-    for n in G:
-        yield (n, _sssp(G, n, weight, return_type="path", dtype=dtype, cutoff=cutoff))
-
-
-@all_pairs_dijkstra_path._can_run
-def _(G, cutoff=None, weight="weight", *, dtype=None):
-    return not callable(weight)
-
-
-@networkx_algorithm(extra_params=_dtype_param, version_added="24.04", _plc="sssp")
-@_add_doc
-def all_pairs_bellman_ford_path(G, weight="weight", *, dtype=None):
-    return all_pairs_dijkstra_path(G, weight=weight, dtype=None)
-
-
-@all_pairs_bellman_ford_path._can_run
-def _(G, weight="weight", *, dtype=None):
-    return (
-        weight is None
-        or not callable(weight)
-        and not nx.is_negatively_weighted(G, weight=weight)
-    )
-
-
-def _sssp(
-    G,
-    source,
-    weight,
-    target=None,
-    *,
-    return_type,
-    dtype,
-    reverse_path=False,
-    cutoff=None,
-):
-    """SSSP for weighted shortest paths.
-
-    Parameters
-    ----------
-    return_type : {"length", "path", "length-path"}
-
-    """
-    # DRY: _bfs in unweighted.py has similar code
-    if source not in G:
-        raise nx.NodeNotFound(f"Node {source} not found in graph")
-    if target is not None:
-        if source == target:
-            if return_type == "path":
-                return [source]
-            if return_type == "length":
-                return 0
-            # return_type == "length-path"
-            return 0, [source]
-        if target not in G or G.src_indices.size == 0:
-            raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-    elif G.src_indices.size == 0:
-        if return_type == "path":
-            return {source: [source]}
-        if return_type == "length":
-            return {source: 0}
-        # return_type == "length-path"
-        return {source: 0}, {source: [source]}
-
-    if callable(weight):
-        raise NotImplementedError("callable `weight` argument is not supported")
-
-    if weight not in G.edge_values:
-        # No edge values, so use BFS instead
-        return _bfs(G, source, cutoff, "Source", return_type=return_type, target=target)
-
-    # Check for negative values since we don't support negative cycles
-    edge_vals = G.edge_values[weight]
-    if weight in G.edge_masks:
-        edge_vals = edge_vals[G.edge_masks[weight]]
-    if (edge_vals < 0).any():
-        raise NotImplementedError("Negative edge weights not yet supported")
-    edge_val = edge_vals[0]
-    if (edge_vals == edge_val).all() and (
-        edge_vals.size == G.src_indices.size or edge_val == 1
-    ):
-        # Edge values are all the same, so use scaled BFS instead
-        return _bfs(
-            G,
-            source,
-            None if cutoff is None else cutoff / edge_val,
-            "Source",
-            return_type=return_type,
-            target=target,
-            scale=edge_val,
-            reverse_path=reverse_path,
-        )
-
-    src_index = source if G.key_to_id is None else G.key_to_id[source]
-    if cutoff is None:
-        cutoff = np.inf
-    else:
-        cutoff = np.nextafter(cutoff, np.inf, dtype=np.float64)
-
-    node_ids, distances, predecessors = plc.sssp(
-        resource_handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(weight, 1, dtype),
-        source=src_index,
-        cutoff=cutoff,
-        compute_predecessors=True,  # TODO: False is not yet supported
-        # compute_predecessors=return_type != "length",
-        do_expensive_check=False,
-    )
-    mask = distances != np.finfo(distances.dtype).max
-    node_ids = node_ids[mask]
-    if return_type != "path":
-        lengths = G._nodearrays_to_dict(node_ids, distances[mask])
-        if target is not None:
-            if target not in lengths:
-                raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-            lengths = lengths[target]
-    if return_type != "length":
-        if target is not None:
-            d = dict(zip(node_ids.tolist(), predecessors[mask].tolist()))
-            dst_index = target if G.key_to_id is None else G.key_to_id[target]
-            if dst_index not in d:
-                raise nx.NetworkXNoPath(f"Node {target} not reachable from {source}")
-            cur = dst_index
-            paths = [dst_index]
-            while cur != src_index:
-                cur = d[cur]
-                paths.append(cur)
-            if (id_to_key := G.id_to_key) is not None:
-                if reverse_path:
-                    paths = [id_to_key[cur] for cur in paths]
-                else:
-                    paths = [id_to_key[cur] for cur in reversed(paths)]
-            elif not reverse_path:
-                paths.reverse()
-        else:
-            groups = _groupby(predecessors[mask], node_ids)
-            if (id_to_key := G.id_to_key) is not None:
-                groups = {id_to_key[k]: v for k, v in groups.items() if k >= 0}
-            paths = {source: [source]}
-            preds = [source]
-            while preds:
-                pred = preds.pop()
-                pred_path = paths[pred]
-                nodes = G._nodearray_to_list(groups[pred])
-                if reverse_path:
-                    for node in nodes:
-                        paths[node] = [node, *pred_path]
-                else:
-                    for node in nodes:
-                        paths[node] = [*pred_path, node]
-                preds.extend(nodes & groups.keys())
-    if return_type == "path":
-        return paths
-    if return_type == "length":
-        return lengths
-    # return_type == "length-path"
-    return lengths, paths
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py
deleted file mode 100644
index 1751cd46919..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .breadth_first_search import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py
deleted file mode 100644
index 72d0079cf0c..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from itertools import repeat
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
-
-__all__ = [
-    "bfs_edges",
-    "bfs_tree",
-    "bfs_predecessors",
-    "bfs_successors",
-    "descendants_at_distance",
-    "bfs_layers",
-    "generic_bfs_edges",
-]
-
-
-def _check_G_and_source(G, source):
-    G = _to_graph(G)
-    if source not in G:
-        hash(source)  # To raise TypeError if appropriate
-        raise nx.NetworkXError(
-            f"The node {source} is not in the {G.__class__.__name__.lower()}."
-        )
-    return G
-
-
-def _bfs(G, source, *, depth_limit=None, reverse=False):
-    src_index = source if G.key_to_id is None else G.key_to_id[source]
-    distances, predecessors, node_ids = plc.bfs(
-        handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(switch_indices=reverse),
-        sources=cp.array([src_index], dtype=index_dtype),
-        direction_optimizing=False,
-        depth_limit=-1 if depth_limit is None else depth_limit,
-        compute_predecessors=True,
-        do_expensive_check=False,
-    )
-    mask = predecessors >= 0
-    return distances[mask], predecessors[mask], node_ids[mask]
-
-
-if _nxver <= (3, 3):
-
-    @networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-    def generic_bfs_edges(
-        G, source, neighbors=None, depth_limit=None, sort_neighbors=None
-    ):
-        """`neighbors` and `sort_neighbors` parameters are not yet supported."""
-        if neighbors is not None:
-            raise NotImplementedError(
-                "neighbors argument in generic_bfs_edges is not currently supported"
-            )
-        if sort_neighbors is not None:
-            raise NotImplementedError(
-                "sort_neighbors argument in generic_bfs_edges is not supported"
-            )
-        return bfs_edges(G, source, depth_limit=depth_limit)
-
-    @generic_bfs_edges._can_run
-    def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None):
-        return neighbors is None and sort_neighbors is None
-
-else:
-
-    @networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-    def generic_bfs_edges(G, source, neighbors=None, depth_limit=None):
-        """`neighbors` parameter is not yet supported."""
-        if neighbors is not None:
-            raise NotImplementedError(
-                "neighbors argument in generic_bfs_edges is not currently supported"
-            )
-        return bfs_edges(G, source, depth_limit=depth_limit)
-
-    @generic_bfs_edges._can_run
-    def _(G, source, neighbors=None, depth_limit=None):
-        return neighbors is None
-
-
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None):
-    """`sort_neighbors` parameter is not yet supported."""
-    if sort_neighbors is not None:
-        raise NotImplementedError(
-            "sort_neighbors argument in bfs_edges is not currently supported"
-        )
-    G = _check_G_and_source(G, source)
-    if depth_limit is not None and depth_limit < 1:
-        return
-    distances, predecessors, node_ids = _bfs(
-        G, source, depth_limit=depth_limit, reverse=reverse
-    )
-    # Using groupby like this is similar to bfs_predecessors
-    groups = _groupby([distances, predecessors], node_ids)
-    id_to_key = G.id_to_key
-    for key in sorted(groups):
-        children_ids = groups[key]
-        parent_id = key[1]
-        parent = id_to_key[parent_id] if id_to_key is not None else parent_id
-        yield from zip(
-            repeat(parent, children_ids.size),
-            G._nodeiter_to_iter(children_ids.tolist()),
-        )
-
-
-@bfs_edges._can_run
-def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None):
-    return sort_neighbors is None
-
-
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None):
-    """`sort_neighbors` parameter is not yet supported."""
-    if sort_neighbors is not None:
-        raise NotImplementedError(
-            "sort_neighbors argument in bfs_tree is not currently supported"
-        )
-    is_compat_graph = isinstance(G, nxcg.Graph)
-    G = _check_G_and_source(G, source)
-    if depth_limit is not None and depth_limit < 1:
-        return nxcg.CudaDiGraph.from_coo(
-            1,
-            cp.array([], dtype=index_dtype),
-            cp.array([], dtype=index_dtype),
-            id_to_key=[source],
-            use_compat_graph=is_compat_graph,
-        )
-
-    distances, predecessors, node_ids = _bfs(
-        G,
-        source,
-        depth_limit=depth_limit,
-        reverse=reverse,
-    )
-    if predecessors.size == 0:
-        return nxcg.CudaDiGraph.from_coo(
-            1,
-            cp.array([], dtype=index_dtype),
-            cp.array([], dtype=index_dtype),
-            id_to_key=[source],
-            use_compat_graph=is_compat_graph,
-        )
-    # TODO: create renumbering helper function(s)
-    unique_node_ids = cp.unique(cp.hstack((predecessors, node_ids)))
-    # Renumber edges
-    src_indices = cp.searchsorted(unique_node_ids, predecessors).astype(index_dtype)
-    dst_indices = cp.searchsorted(unique_node_ids, node_ids).astype(index_dtype)
-    # Renumber nodes
-    if (id_to_key := G.id_to_key) is not None:
-        key_to_id = {
-            id_to_key[old_index]: new_index
-            for new_index, old_index in enumerate(unique_node_ids.tolist())
-        }
-    else:
-        key_to_id = {
-            old_index: new_index
-            for new_index, old_index in enumerate(unique_node_ids.tolist())
-        }
-    return nxcg.CudaDiGraph.from_coo(
-        unique_node_ids.size,
-        src_indices,
-        dst_indices,
-        key_to_id=key_to_id,
-        use_compat_graph=is_compat_graph,
-    )
-
-
-@bfs_tree._can_run
-def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None):
-    return sort_neighbors is None
-
-
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-def bfs_successors(G, source, depth_limit=None, sort_neighbors=None):
-    """`sort_neighbors` parameter is not yet supported."""
-    if sort_neighbors is not None:
-        raise NotImplementedError(
-            "sort_neighbors argument in bfs_successors is not currently supported"
-        )
-    G = _check_G_and_source(G, source)
-    if depth_limit is not None and depth_limit < 1:
-        yield (source, [])
-        return
-
-    distances, predecessors, node_ids = _bfs(G, source, depth_limit=depth_limit)
-    groups = _groupby([distances, predecessors], node_ids)
-    id_to_key = G.id_to_key
-    for key in sorted(groups):
-        children_ids = groups[key]
-        parent_id = key[1]
-        parent = id_to_key[parent_id] if id_to_key is not None else parent_id
-        children = G._nodearray_to_list(children_ids)
-        yield (parent, children)
-
-
-@bfs_successors._can_run
-def _(G, source, depth_limit=None, sort_neighbors=None):
-    return sort_neighbors is None
-
-
-@networkx_algorithm(version_added="24.02", _plc="bfs")
-def bfs_layers(G, sources):
-    G = _to_graph(G)
-    if sources in G:
-        sources = [sources]
-    else:
-        sources = set(sources)
-        if not all(source in G for source in sources):
-            node = next(source for source in sources if source not in G)
-            raise nx.NetworkXError(f"The node {node} is not in the graph.")
-        sources = list(sources)
-    source_ids = G._list_to_nodearray(sources)
-    distances, predecessors, node_ids = plc.bfs(
-        handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        sources=source_ids,
-        direction_optimizing=False,
-        depth_limit=-1,
-        compute_predecessors=False,
-        do_expensive_check=False,
-    )
-    mask = distances != np.iinfo(distances.dtype).max
-    distances = distances[mask]
-    node_ids = node_ids[mask]
-    groups = _groupby(distances, node_ids)
-    return (G._nodearray_to_list(groups[key]) for key in range(len(groups)))
-
-
-@networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs")
-def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None):
-    """`sort_neighbors` parameter is not yet supported."""
-    if sort_neighbors is not None:
-        raise NotImplementedError(
-            "sort_neighbors argument in bfs_predecessors is not currently supported"
-        )
-    G = _check_G_and_source(G, source)
-    if depth_limit is not None and depth_limit < 1:
-        return
-
-    distances, predecessors, node_ids = _bfs(G, source, depth_limit=depth_limit)
-    # We include `predecessors` in the groupby for "nicer" iteration order
-    groups = _groupby([distances, predecessors], node_ids)
-    id_to_key = G.id_to_key
-    for key in sorted(groups):
-        children_ids = groups[key]
-        parent_id = key[1]
-        parent = id_to_key[parent_id] if id_to_key is not None else parent_id
-        yield from zip(
-            G._nodeiter_to_iter(children_ids.tolist()),
-            repeat(parent, children_ids.size),
-        )
-
-
-@bfs_predecessors._can_run
-def _(G, source, depth_limit=None, sort_neighbors=None):
-    return sort_neighbors is None
-
-
-@networkx_algorithm(version_added="24.02", _plc="bfs")
-def descendants_at_distance(G, source, distance):
-    G = _check_G_and_source(G, source)
-    if distance is None or distance < 0:
-        return set()
-    if distance == 0:
-        return {source}
-
-    src_index = source if G.key_to_id is None else G.key_to_id[source]
-    distances, predecessors, node_ids = plc.bfs(
-        handle=plc.ResourceHandle(),
-        graph=G._get_plc_graph(),
-        sources=cp.array([src_index], dtype=index_dtype),
-        direction_optimizing=False,
-        depth_limit=distance,
-        compute_predecessors=False,
-        do_expensive_check=False,
-    )
-    mask = distances == distance
-    node_ids = node_ids[mask]
-    return G._nodearray_to_set(node_ids)
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/tree/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/tree/__init__.py
deleted file mode 100644
index 91bf72417be..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/tree/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .recognition import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/tree/recognition.py b/python/nx-cugraph/nx_cugraph/algorithms/tree/recognition.py
deleted file mode 100644
index 74f57b5ea5a..00000000000
--- a/python/nx-cugraph/nx_cugraph/algorithms/tree/recognition.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph.convert import _to_directed_graph, _to_graph
-from nx_cugraph.utils import networkx_algorithm, not_implemented_for
-
-__all__ = ["is_arborescence", "is_branching", "is_forest", "is_tree"]
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def is_arborescence(G):
-    G = _to_directed_graph(G)
-    return is_tree(G) and int(G._in_degrees_array().max()) <= 1
-
-
-@not_implemented_for("undirected")
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def is_branching(G):
-    G = _to_directed_graph(G)
-    return is_forest(G) and int(G._in_degrees_array().max()) <= 1
-
-
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def is_forest(G):
-    G = _to_graph(G)
-    if len(G) == 0:
-        raise nx.NetworkXPointlessConcept("G has no nodes.")
-    if is_directed := G.is_directed():
-        connected_components = nxcg.weakly_connected_components
-    else:
-        connected_components = nxcg.connected_components
-    for components in connected_components(G):
-        node_ids = G._list_to_nodearray(list(components))
-        # TODO: create utilities for creating subgraphs
-        mask = cp.isin(G.src_indices, node_ids) & cp.isin(G.dst_indices, node_ids)
-        # A tree must have an edge count equal to the number of nodes minus the
-        # tree's root node.
-        if is_directed:
-            if int(cp.count_nonzero(mask)) != len(components) - 1:
-                return False
-        else:
-            src_indices = G.src_indices[mask]
-            dst_indices = G.dst_indices[mask]
-            if int(cp.count_nonzero(src_indices <= dst_indices)) != len(components) - 1:
-                return False
-    return True
-
-
-@networkx_algorithm(version_added="24.02", _plc="weakly_connected_components")
-def is_tree(G):
-    G = _to_graph(G)
-    if len(G) == 0:
-        raise nx.NetworkXPointlessConcept("G has no nodes.")
-    if G.is_directed():
-        is_connected = nxcg.is_weakly_connected
-    else:
-        is_connected = nxcg.is_connected
-    # A tree must have an edge count equal to the number of nodes minus the
-    # tree's root node.
-    return len(G) - 1 == G.number_of_edges() and is_connected(G)
diff --git a/python/nx-cugraph/nx_cugraph/classes/__init__.py b/python/nx-cugraph/nx_cugraph/classes/__init__.py
deleted file mode 100644
index 71168e5364f..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .graph import CudaGraph, Graph
-from .digraph import CudaDiGraph, DiGraph
-from .multigraph import CudaMultiGraph, MultiGraph
-from .multidigraph import CudaMultiDiGraph, MultiDiGraph
-
-from .function import *
diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py
deleted file mode 100644
index 178bf44f16e..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/digraph.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-from copy import deepcopy
-from typing import TYPE_CHECKING
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-from networkx.classes.digraph import (
-    _CachedPropertyResetterAdjAndSucc,
-    _CachedPropertyResetterPred,
-)
-
-import nx_cugraph as nxcg
-
-from ..utils import index_dtype
-from .graph import CudaGraph, Graph
-
-if TYPE_CHECKING:  # pragma: no cover
-    from nx_cugraph.typing import AttrKey
-
-__all__ = ["CudaDiGraph", "DiGraph"]
-
-networkx_api = nxcg.utils.decorators.networkx_class(nx.DiGraph)
-
-
-class DiGraph(nx.DiGraph, Graph):
-    _nx_attrs = ("_node", "_adj", "_succ", "_pred")
-
-    name = Graph.name
-    _node = Graph._node
-
-    @property
-    @networkx_api
-    def _adj(self):
-        if (adj := self.__dict__["_adj"]) is None:
-            self._reify_networkx()
-            adj = self.__dict__["_adj"]
-        return adj
-
-    @_adj.setter
-    def _adj(self, val):
-        self._prepare_setter()
-        _CachedPropertyResetterAdjAndSucc.__set__(None, self, val)
-        if cache := getattr(self, "__networkx_cache__", None):
-            cache.clear()
-
-    @property
-    @networkx_api
-    def _succ(self):
-        if (succ := self.__dict__["_succ"]) is None:
-            self._reify_networkx()
-            succ = self.__dict__["_succ"]
-        return succ
-
-    @_succ.setter
-    def _succ(self, val):
-        self._prepare_setter()
-        _CachedPropertyResetterAdjAndSucc.__set__(None, self, val)
-        if cache := getattr(self, "__networkx_cache__", None):
-            cache.clear()
-
-    @property
-    @networkx_api
-    def _pred(self):
-        if (pred := self.__dict__["_pred"]) is None:
-            self._reify_networkx()
-            pred = self.__dict__["_pred"]
-        return pred
-
-    @_pred.setter
-    def _pred(self, val):
-        self._prepare_setter()
-        _CachedPropertyResetterPred.__set__(None, self, val)
-        if cache := getattr(self, "__networkx_cache__", None):
-            cache.clear()
-
-    @classmethod
-    @networkx_api
-    def is_directed(cls) -> bool:
-        return True
-
-    @classmethod
-    @networkx_api
-    def is_multigraph(cls) -> bool:
-        return False
-
-    @classmethod
-    def to_cudagraph_class(cls) -> type[CudaDiGraph]:
-        return CudaDiGraph
-
-    @classmethod
-    def to_networkx_class(cls) -> type[nx.DiGraph]:
-        return nx.DiGraph
-
-
-class CudaDiGraph(CudaGraph):
-    #################
-    # Class methods #
-    #################
-
-    is_directed = classmethod(DiGraph.is_directed.__func__)
-    is_multigraph = classmethod(DiGraph.is_multigraph.__func__)
-    to_cudagraph_class = classmethod(DiGraph.to_cudagraph_class.__func__)
-    to_networkx_class = classmethod(DiGraph.to_networkx_class.__func__)
-
-    @classmethod
-    def _to_compat_graph_class(cls) -> type[DiGraph]:
-        return DiGraph
-
-    @networkx_api
-    def size(self, weight: AttrKey | None = None) -> int:
-        if weight is not None:
-            raise NotImplementedError
-        return self.src_indices.size
-
-    ##########################
-    # NetworkX graph methods #
-    ##########################
-
-    @networkx_api
-    def reverse(self, copy: bool = True) -> CudaDiGraph:
-        return self._copy(not copy, self.__class__, reverse=True)
-
-    @networkx_api
-    def to_undirected(self, reciprocal=False, as_view=False):
-        N = self._N
-        # Upcast to int64 so indices don't overflow
-        src_dst_indices_old = N * self.src_indices.astype(np.int64) + self.dst_indices
-        if reciprocal:
-            src_dst_indices_new = cp.intersect1d(
-                src_dst_indices_old,
-                self.src_indices + N * self.dst_indices.astype(np.int64),
-                # assume_unique=True,  # cupy <= 12.2.0 also assumes sorted
-            )
-            if self.edge_values:
-                sorter = cp.argsort(src_dst_indices_old)
-                idx = cp.searchsorted(
-                    src_dst_indices_old, src_dst_indices_new, sorter=sorter
-                )
-                indices = sorter[idx]
-                src_indices = self.src_indices[indices].copy()
-                dst_indices = self.dst_indices[indices].copy()
-                edge_values = {
-                    key: val[indices].copy() for key, val in self.edge_values.items()
-                }
-                edge_masks = {
-                    key: val[indices].copy() for key, val in self.edge_masks.items()
-                }
-            else:
-                src_indices, dst_indices = cp.divmod(src_dst_indices_new, N)
-                src_indices = src_indices.astype(index_dtype)
-                dst_indices = dst_indices.astype(index_dtype)
-        else:
-            src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype(
-                np.int64
-            )
-            if self.edge_values:
-                src_dst_extra = cp.setdiff1d(
-                    src_dst_indices_old_T, src_dst_indices_old, assume_unique=True
-                )
-                sorter = cp.argsort(src_dst_indices_old_T)
-                idx = cp.searchsorted(
-                    src_dst_indices_old_T, src_dst_extra, sorter=sorter
-                )
-                indices = sorter[idx]
-                src_indices = cp.hstack((self.src_indices, self.dst_indices[indices]))
-                dst_indices = cp.hstack((self.dst_indices, self.src_indices[indices]))
-                edge_values = {
-                    key: cp.hstack((val, val[indices]))
-                    for key, val in self.edge_values.items()
-                }
-                edge_masks = {
-                    key: cp.hstack((val, val[indices]))
-                    for key, val in self.edge_masks.items()
-                }
-            else:
-                src_dst_indices_new = cp.union1d(
-                    src_dst_indices_old, src_dst_indices_old_T
-                )
-                src_indices, dst_indices = cp.divmod(src_dst_indices_new, N)
-                src_indices = src_indices.astype(index_dtype)
-                dst_indices = dst_indices.astype(index_dtype)
-
-        if self.edge_values:
-            recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices)))
-            for key, mask in edge_masks.items():
-                # Make sure we choose a value that isn't masked out
-                val = edge_values[key]
-                rmask = mask[recip_indices]
-                recip_only = rmask & ~mask
-                val[recip_only] = val[recip_indices[recip_only]]
-                only = mask & ~rmask
-                val[recip_indices[only]] = val[only]
-                mask |= mask[recip_indices]
-            # Arbitrarily choose to use value from (j > i) edge
-            mask = src_indices < dst_indices
-            left_idx = cp.nonzero(mask)[0]
-            right_idx = recip_indices[mask]
-            for val in edge_values.values():
-                val[left_idx] = val[right_idx]
-        else:
-            edge_values = {}
-            edge_masks = {}
-
-        node_values = self.node_values
-        node_masks = self.node_masks
-        key_to_id = self.key_to_id
-        id_to_key = None if key_to_id is None else self._id_to_key
-        if not as_view:
-            node_values = {key: val.copy() for key, val in node_values.items()}
-            node_masks = {key: val.copy() for key, val in node_masks.items()}
-            if key_to_id is not None:
-                key_to_id = key_to_id.copy()
-                if id_to_key is not None:
-                    id_to_key = id_to_key.copy()
-        rv = self.to_undirected_class().from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=False,
-        )
-        if as_view:
-            rv.graph = self.graph
-        else:
-            rv.graph.update(deepcopy(self.graph))
-        return rv
-
-    # Many more methods to implement...
-
-    ###################
-    # Private methods #
-    ###################
-
-    def _in_degrees_array(self, *, ignore_selfloops=False):
-        dst_indices = self.dst_indices
-        if ignore_selfloops:
-            not_selfloops = self.src_indices != dst_indices
-            dst_indices = dst_indices[not_selfloops]
-        if dst_indices.size == 0:
-            return cp.zeros(self._N, dtype=np.int64)
-        return cp.bincount(dst_indices, minlength=self._N)
-
-    def _out_degrees_array(self, *, ignore_selfloops=False):
-        src_indices = self.src_indices
-        if ignore_selfloops:
-            not_selfloops = src_indices != self.dst_indices
-            src_indices = src_indices[not_selfloops]
-        if src_indices.size == 0:
-            return cp.zeros(self._N, dtype=np.int64)
-        return cp.bincount(src_indices, minlength=self._N)
diff --git a/python/nx-cugraph/nx_cugraph/classes/function.py b/python/nx-cugraph/nx_cugraph/classes/function.py
deleted file mode 100644
index 55cbf19aa7a..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/function.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-
-from nx_cugraph.convert import _to_graph
-from nx_cugraph.utils import networkx_algorithm
-
-__all__ = [
-    "is_negatively_weighted",
-    "number_of_selfloops",
-]
-
-
-@networkx_algorithm(version_added="24.04")
-def is_negatively_weighted(G, edge=None, weight="weight"):
-    G = _to_graph(G, weight)
-    if edge is not None:
-        data = G.get_edge_data(*edge)
-        if data is None:
-            raise nx.NetworkXError(f"Edge {edge!r} does not exist.")
-        return weight in data and data[weight] < 0
-    if weight not in G.edge_values:
-        return False
-    edge_vals = G.edge_values[weight]
-    if weight in G.edge_masks:
-        edge_vals = edge_vals[G.edge_masks[weight]]
-    return bool((edge_vals < 0).any())
-
-
-@networkx_algorithm(version_added="23.12")
-def number_of_selfloops(G):
-    G = _to_graph(G)
-    is_selfloop = G.src_indices == G.dst_indices
-    return int(cp.count_nonzero(is_selfloop))
-
-
-@number_of_selfloops._should_run
-def _(G):
-    return "Fast algorithm; not worth converting."
diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py
deleted file mode 100644
index cfe1e1c87e9..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/graph.py
+++ /dev/null
@@ -1,1147 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import operator as op
-from copy import deepcopy
-from typing import TYPE_CHECKING
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-from networkx.classes.graph import (
-    _CachedPropertyResetterAdj,
-    _CachedPropertyResetterNode,
-)
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import index_dtype
-
-if TYPE_CHECKING:  # pragma: no cover
-    from collections.abc import Iterable, Iterator
-    from typing import ClassVar
-
-    from nx_cugraph.typing import (
-        AttrKey,
-        Dtype,
-        EdgeTuple,
-        EdgeValue,
-        IndexValue,
-        NodeKey,
-        NodeValue,
-        any_ndarray,
-    )
-
-__all__ = ["CudaGraph", "Graph"]
-
-networkx_api = nxcg.utils.decorators.networkx_class(nx.Graph)
-
-# The "everything" cache key is an internal implementation detail of NetworkX
-# that may change between releases.
-if _nxver < (3, 4):
-    _CACHE_KEY = (
-        True,  # Include all edge values
-        True,  # Include all node values
-        True,  # Include `.graph` attributes
-    )
-else:
-    _CACHE_KEY = (
-        True,  # Include all edge values
-        True,  # Include all node values
-        # `.graph` attributes are always included now
-    )
-
-# Use to indicate when a full conversion to GPU failed so we don't try again.
-_CANT_CONVERT_TO_GPU = "_CANT_CONVERT_TO_GPU"
-
-
-# `collections.UserDict` was the preferred way to subclass dict, but now
-# subclassing dict directly is much better supported and should work here.
-# This class should only be necessary if the user clears the cache manually.
-class _GraphCache(dict):
-    """Cache that ensures Graph will reify into a NetworkX graph when cleared."""
-
-    _graph: Graph
-
-    def __init__(self, graph: Graph):
-        self._graph = graph
-
-    def clear(self) -> None:
-        self._graph._reify_networkx()
-        super().clear()
-
-
-class Graph(nx.Graph):
-    # Tell networkx to dispatch calls with this object to nx-cugraph
-    __networkx_backend__: ClassVar[str] = "cugraph"  # nx >=3.2
-    __networkx_plugin__: ClassVar[str] = "cugraph"  # nx <3.2
-
-    # Core attributes of NetowkrX graphs that will be copied and cleared as appropriate.
-    # These attributes comprise the edge and node data model for NetworkX graphs.
-    _nx_attrs = ("_node", "_adj")
-
-    # Allow networkx dispatch machinery to cache conversions.
-    # This means we should clear the cache if we ever mutate the object!
-    __networkx_cache__: _GraphCache | None
-
-    # networkx properties
-    graph: dict
-    # Should we declare type annotations for the rest?
-
-    # Properties that trigger copying to the CPU
-    def _prepare_setter(self):
-        """Be careful when setting private attributes which may be used during init."""
-        if (
-            # If not present, then this must be in init
-            any(attr not in self.__dict__ for attr in self._nx_attrs)
-            # Already on the CPU
-            or not any(self.__dict__[attr] is None for attr in self._nx_attrs)
-        ):
-            return
-        if self._is_on_gpu:
-            # Copy from GPU to CPU
-            self._reify_networkx()
-            return
-        # Default values
-        for attr in self._nx_attrs:
-            if self.__dict__[attr] is None:
-                if attr == "_succ":
-                    self.__dict__[attr] = self.__dict__["_adj"]
-                else:
-                    self.__dict__[attr] = {}
-
-    @property
-    @networkx_api
-    def _node(self):
-        if (node := self.__dict__["_node"]) is None:
-            self._reify_networkx()
-            node = self.__dict__["_node"]
-        return node
-
-    @_node.setter
-    def _node(self, val):
-        self._prepare_setter()
-        _CachedPropertyResetterNode.__set__(None, self, val)
-        if cache := getattr(self, "__networkx_cache__", None):
-            cache.clear()
-
-    @property
-    @networkx_api
-    def _adj(self):
-        if (adj := self.__dict__["_adj"]) is None:
-            self._reify_networkx()
-            adj = self.__dict__["_adj"]
-        return adj
-
-    @_adj.setter
-    def _adj(self, val):
-        self._prepare_setter()
-        _CachedPropertyResetterAdj.__set__(None, self, val)
-        if cache := getattr(self, "__networkx_cache__", None):
-            cache.clear()
-
-    @property
-    def _is_on_gpu(self) -> bool:
-        """Whether the full graph is on device (in the cache).
-
-        This returns False when only a subset of the graph (such as only
-        edge indices and edge attribute) is on device.
-
-        The graph may be on host (CPU) and device (GPU) at the same time.
-        """
-        cache = getattr(self, "__networkx_cache__", None)
-        if not cache:
-            return False
-        return _CACHE_KEY in cache.get("backends", {}).get("cugraph", {})
-
-    @property
-    def _is_on_cpu(self) -> bool:
-        """Whether the graph is on host as a NetworkX graph.
-
-        This means the core data structures that comprise a NetworkX graph
-        (such as ``G._node`` and ``G._adj``) are present.
-
-        The graph may be on host (CPU) and device (GPU) at the same time.
-        """
-        return self.__dict__["_node"] is not None
-
-    @property
-    def _cudagraph(self):
-        """Return the full ``CudaGraph`` on device, computing if necessary, or None."""
-        nx_cache = getattr(self, "__networkx_cache__", None)
-        if nx_cache is None:
-            nx_cache = {}
-        elif _CANT_CONVERT_TO_GPU in nx_cache:
-            return None
-        cache = nx_cache.setdefault("backends", {}).setdefault("cugraph", {})
-        if (Gcg := cache.get(_CACHE_KEY)) is not None:
-            if isinstance(Gcg, Graph):
-                # This shouldn't happen during normal use, but be extra-careful anyway
-                return Gcg._cudagraph
-            return Gcg
-        if self.__dict__["_node"] is None:
-            raise RuntimeError(
-                f"{type(self).__name__} cannot be converted to the GPU, because it is "
-                "not on the CPU! This is not supposed to be possible. If you believe "
-                "you have found a bug, please report a minimum reproducible example to "
-                "https://github.com/rapidsai/cugraph/issues/new/choose"
-            )
-        try:
-            Gcg = nxcg.from_networkx(
-                self, preserve_edge_attrs=True, preserve_node_attrs=True
-            )
-        except Exception:
-            # Should we warn that the full graph can't be on GPU?
-            nx_cache[_CANT_CONVERT_TO_GPU] = True
-            return None
-        Gcg.graph = self.graph
-        cache[_CACHE_KEY] = Gcg
-        return Gcg
-
-    @_cudagraph.setter
-    def _cudagraph(self, val, *, clear_cpu=True):
-        """Set the full ``CudaGraph`` for this graph, or remove from device if None."""
-        if (cache := getattr(self, "__networkx_cache__", None)) is None:
-            # Should we warn?
-            return
-        # TODO: pay close attention to when we should clear the cache, since
-        # this may or may not be a mutation.
-        cache = cache.setdefault("backends", {}).setdefault("cugraph", {})
-        if val is None:
-            cache.pop(_CACHE_KEY, None)
-        else:
-            self.graph = val.graph
-            cache[_CACHE_KEY] = val
-            if clear_cpu:
-                for key in self._nx_attrs:
-                    self.__dict__[key] = None
-
-    @nx.Graph.name.setter
-    def name(self, s):
-        # Don't clear the cache when setting the name, since `.graph` is shared.
-        # There is a very small risk here for the cache to become (slightly)
-        # insconsistent if graphs from other backends are cached.
-        self.graph["name"] = s
-
-    @classmethod
-    @networkx_api
-    def is_directed(cls) -> bool:
-        return False
-
-    @classmethod
-    @networkx_api
-    def is_multigraph(cls) -> bool:
-        return False
-
-    @classmethod
-    def to_cudagraph_class(cls) -> type[CudaGraph]:
-        return CudaGraph
-
-    @classmethod
-    @networkx_api
-    def to_directed_class(cls) -> type[nxcg.DiGraph]:
-        return nxcg.DiGraph
-
-    @classmethod
-    def to_networkx_class(cls) -> type[nx.Graph]:
-        return nx.Graph
-
-    @classmethod
-    @networkx_api
-    def to_undirected_class(cls) -> type[Graph]:
-        return Graph
-
-    def __init__(self, incoming_graph_data=None, **attr):
-        super().__init__(incoming_graph_data, **attr)
-        self.__networkx_cache__ = _GraphCache(self)
-
-    def _reify_networkx(self) -> None:
-        """Copy graph to host (CPU) if necessary."""
-        if self.__dict__["_node"] is None:
-            # After we make this into an nx graph, we rely on the cache being correct
-            Gcg = self._cudagraph
-            G = nxcg.to_networkx(Gcg)
-            for key in self._nx_attrs:
-                self.__dict__[key] = G.__dict__[key]
-
-    def _become(self, other: Graph):
-        if self.__class__ is not other.__class__:
-            raise TypeError(
-                "Attempting to update graph inplace with graph of different type!"
-            )
-        # Begin with the simplest implementation; do we need to do more?
-        self.__dict__.update(other.__dict__)
-        return self
-
-    ####################
-    # Creation methods #
-    ####################
-
-    @classmethod
-    def from_coo(
-        cls,
-        N: int,
-        src_indices: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> Graph | CudaGraph:
-        new_graph = object.__new__(cls.to_cudagraph_class())
-        new_graph.__networkx_cache__ = {}
-        new_graph.src_indices = src_indices
-        new_graph.dst_indices = dst_indices
-        new_graph.edge_values = {} if edge_values is None else dict(edge_values)
-        new_graph.edge_masks = {} if edge_masks is None else dict(edge_masks)
-        new_graph.node_values = {} if node_values is None else dict(node_values)
-        new_graph.node_masks = {} if node_masks is None else dict(node_masks)
-        new_graph.key_to_id = None if key_to_id is None else dict(key_to_id)
-        new_graph._id_to_key = None if id_to_key is None else list(id_to_key)
-        new_graph._N = op.index(N)  # Ensure N is integral
-        new_graph._node_ids = None
-        new_graph.graph = new_graph.graph_attr_dict_factory()
-        new_graph.graph.update(attr)
-        size = new_graph.src_indices.size
-        # Easy and fast sanity checks
-        if size != new_graph.dst_indices.size:
-            raise ValueError
-        for edge_attr in ["edge_values", "edge_masks"]:
-            if datadict := getattr(new_graph, edge_attr):
-                for key, val in datadict.items():
-                    if val.shape[0] != size:
-                        raise ValueError(key)
-        for node_attr in ["node_values", "node_masks"]:
-            if datadict := getattr(new_graph, node_attr):
-                for key, val in datadict.items():
-                    if val.shape[0] != N:
-                        raise ValueError(key)
-        if new_graph.key_to_id is not None and len(new_graph.key_to_id) != N:
-            raise ValueError
-        if new_graph._id_to_key is not None and len(new_graph._id_to_key) != N:
-            raise ValueError
-        if new_graph._id_to_key is not None and new_graph.key_to_id is None:
-            try:
-                new_graph.key_to_id = dict(zip(new_graph._id_to_key, range(N)))
-            except TypeError as exc:
-                raise ValueError("Bad type of a node value") from exc
-        if new_graph.src_indices.dtype != index_dtype:
-            src_indices = new_graph.src_indices.astype(index_dtype)
-            if not (new_graph.src_indices == src_indices).all():
-                raise ValueError(
-                    f"Unable to convert src_indices to {src_indices.dtype.name} "
-                    f"(got {new_graph.src_indices.dtype.name})."
-                )
-            new_graph.src_indices = src_indices
-        if new_graph.dst_indices.dtype != index_dtype:
-            dst_indices = new_graph.dst_indices.astype(index_dtype)
-            if not (new_graph.dst_indices == dst_indices).all():
-                raise ValueError(
-                    f"Unable to convert dst_indices to {dst_indices.dtype.name} "
-                    f"(got {new_graph.dst_indices.dtype.name})."
-                )
-            new_graph.dst_indices = dst_indices
-
-        # If the graph contains isolates, plc.SGGraph() must be passed a value
-        # for vertices_array that contains every vertex ID, since the
-        # src/dst_indices arrays will not contain IDs for isolates. Create this
-        # only if needed. Like src/dst_indices, the _node_ids array must be
-        # maintained for the lifetime of the plc.SGGraph
-        isolates = nxcg.algorithms.isolate._isolates(new_graph)
-        if len(isolates) > 0:
-            new_graph._node_ids = cp.arange(new_graph._N, dtype=index_dtype)
-        if use_compat_graph or use_compat_graph is None and issubclass(cls, Graph):
-            new_graph = new_graph._to_compat_graph()
-        return new_graph
-
-    @classmethod
-    def from_csr(
-        cls,
-        indptr: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> Graph | CudaGraph:
-        N = indptr.size - 1
-        src_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_csc(
-        cls,
-        indptr: cp.ndarray[IndexValue],
-        src_indices: cp.ndarray[IndexValue],
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> Graph | CudaGraph:
-        N = indptr.size - 1
-        dst_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_dcsr(
-        cls,
-        N: int,
-        compressed_srcs: cp.ndarray[IndexValue],
-        indptr: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> Graph | CudaGraph:
-        src_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(compressed_srcs.get(), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_dcsc(
-        cls,
-        N: int,
-        compressed_dsts: cp.ndarray[IndexValue],
-        indptr: cp.ndarray[IndexValue],
-        src_indices: cp.ndarray[IndexValue],
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> Graph | CudaGraph:
-        dst_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(compressed_dsts.get(), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-
-class CudaGraph:
-    # Tell networkx to dispatch calls with this object to nx-cugraph
-    __networkx_backend__: ClassVar[str] = "cugraph"  # nx >=3.2
-    __networkx_plugin__: ClassVar[str] = "cugraph"  # nx <3.2
-
-    # Allow networkx dispatch machinery to cache conversions.
-    # This means we should clear the cache if we ever mutate the object!
-    __networkx_cache__: dict | None
-
-    # networkx properties
-    graph: dict
-    graph_attr_dict_factory: ClassVar[type] = dict
-
-    # Not networkx properties
-    # We store edge data in COO format with {src,dst}_indices and edge_values.
-    src_indices: cp.ndarray[IndexValue]
-    dst_indices: cp.ndarray[IndexValue]
-    edge_values: dict[AttrKey, cp.ndarray[EdgeValue]]
-    edge_masks: dict[AttrKey, cp.ndarray[bool]]
-    node_values: dict[AttrKey, any_ndarray[NodeValue]]
-    node_masks: dict[AttrKey, any_ndarray[bool]]
-    key_to_id: dict[NodeKey, IndexValue] | None
-    _id_to_key: list[NodeKey] | None
-    _N: int
-    _node_ids: cp.ndarray[IndexValue] | None  # holds plc.SGGraph.vertices_array data
-
-    # Used by graph._get_plc_graph
-    _plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = {
-        # signed int
-        np.dtype(np.int8): np.dtype(np.float32),
-        np.dtype(np.int16): np.dtype(np.float32),
-        np.dtype(np.int32): np.dtype(np.float64),
-        np.dtype(np.int64): np.dtype(np.float64),  # raise if abs(x) > 2**53
-        # unsigned int
-        np.dtype(np.uint8): np.dtype(np.float32),
-        np.dtype(np.uint16): np.dtype(np.float32),
-        np.dtype(np.uint32): np.dtype(np.float64),
-        np.dtype(np.uint64): np.dtype(np.float64),  # raise if x > 2**53
-        # other
-        np.dtype(np.bool_): np.dtype(np.float32),
-        np.dtype(np.float16): np.dtype(np.float32),
-    }
-    _plc_allowed_edge_types: ClassVar[set[np.dtype]] = {
-        np.dtype(np.float32),
-        np.dtype(np.float64),
-    }
-
-    ####################
-    # Creation methods #
-    ####################
-
-    from_coo = classmethod(Graph.from_coo.__func__)
-    from_csr = classmethod(Graph.from_csr.__func__)
-    from_csc = classmethod(Graph.from_csc.__func__)
-    from_dcsr = classmethod(Graph.from_dcsr.__func__)
-    from_dcsc = classmethod(Graph.from_dcsc.__func__)
-
-    def __new__(cls, incoming_graph_data=None, **attr) -> CudaGraph:
-        if incoming_graph_data is None:
-            new_graph = cls.from_coo(
-                0,
-                cp.empty(0, index_dtype),
-                cp.empty(0, index_dtype),
-                use_compat_graph=False,
-            )
-        elif incoming_graph_data.__class__ is cls:
-            new_graph = incoming_graph_data.copy()
-        elif incoming_graph_data.__class__ is cls.to_networkx_class():
-            new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True)
-        else:
-            raise NotImplementedError
-        new_graph.graph.update(attr)
-        # We could return Graph here (if configured), but let's not for now
-        return new_graph
-
-    #################
-    # Class methods #
-    #################
-
-    is_directed = classmethod(Graph.is_directed.__func__)
-    is_multigraph = classmethod(Graph.is_multigraph.__func__)
-    to_cudagraph_class = classmethod(Graph.to_cudagraph_class.__func__)
-    to_networkx_class = classmethod(Graph.to_networkx_class.__func__)
-
-    @classmethod
-    @networkx_api
-    def to_directed_class(cls) -> type[nxcg.CudaDiGraph]:
-        return nxcg.CudaDiGraph
-
-    @classmethod
-    @networkx_api
-    def to_undirected_class(cls) -> type[CudaGraph]:
-        return CudaGraph
-
-    @classmethod
-    def _to_compat_graph_class(cls) -> type[Graph]:
-        return Graph
-
-    ##############
-    # Properties #
-    ##############
-
-    @property
-    def edge_dtypes(self) -> dict[AttrKey, Dtype]:
-        return {key: val.dtype for key, val in self.edge_values.items()}
-
-    @property
-    def node_dtypes(self) -> dict[AttrKey, Dtype]:
-        return {key: val.dtype for key, val in self.node_values.items()}
-
-    @property
-    def id_to_key(self) -> [NodeKey] | None:
-        if self.key_to_id is None:
-            return None
-        if self._id_to_key is None:
-            self._id_to_key = sorted(self.key_to_id, key=self.key_to_id.__getitem__)
-        return self._id_to_key
-
-    name = nx.Graph.name
-
-    ##################
-    # Dunder methods #
-    ##################
-
-    @networkx_api
-    def __contains__(self, n: NodeKey) -> bool:
-        if self.key_to_id is not None:
-            container = self.key_to_id
-        else:
-            container = range(self._N)
-        try:
-            return n in container
-        except TypeError:
-            return False
-
-    @networkx_api
-    def __iter__(self) -> Iterator[NodeKey]:
-        if self.key_to_id is not None:
-            return iter(self.key_to_id)
-        return iter(range(self._N))
-
-    @networkx_api
-    def __len__(self) -> int:
-        return self._N
-
-    __str__ = nx.Graph.__str__
-
-    ##########################
-    # NetworkX graph methods #
-    ##########################
-
-    @networkx_api
-    def add_nodes_from(self, nodes_for_adding: Iterable[NodeKey], **attr) -> None:
-        if self._N != 0:
-            raise NotImplementedError(
-                "add_nodes_from is not implemented for graph that already has nodes."
-            )
-        G = self.to_networkx_class()()
-        G.add_nodes_from(nodes_for_adding, **attr)
-        G = nxcg.from_networkx(G, preserve_node_attrs=True)
-        self._become(G)
-
-    @networkx_api
-    def clear(self) -> None:
-        self.edge_values.clear()
-        self.edge_masks.clear()
-        self.node_values.clear()
-        self.node_masks.clear()
-        self.graph.clear()
-        self.src_indices = cp.empty(0, self.src_indices.dtype)
-        self.dst_indices = cp.empty(0, self.dst_indices.dtype)
-        self._N = 0
-        self._node_ids = None
-        self.key_to_id = None
-        self._id_to_key = None
-        if cache := self.__networkx_cache__:
-            cache.clear()
-
-    @networkx_api
-    def clear_edges(self) -> None:
-        self.edge_values.clear()
-        self.edge_masks.clear()
-        self.src_indices = cp.empty(0, self.src_indices.dtype)
-        self.dst_indices = cp.empty(0, self.dst_indices.dtype)
-        if cache := self.__networkx_cache__:
-            cache.clear()
-
-    @networkx_api
-    def copy(self, as_view: bool = False) -> CudaGraph:
-        # Does shallow copy in networkx
-        return self._copy(as_view, self.__class__)
-
-    @networkx_api
-    def get_edge_data(
-        self, u: NodeKey, v: NodeKey, default: EdgeValue | None = None
-    ) -> dict[AttrKey, EdgeValue]:
-        if self.key_to_id is not None:
-            try:
-                u = self.key_to_id[u]
-                v = self.key_to_id[v]
-            except KeyError:
-                return default
-        else:
-            try:
-                if u < 0 or v < 0 or u >= self._N or v >= self._N:
-                    return default
-            except TypeError:
-                return default
-        index = cp.nonzero((self.src_indices == u) & (self.dst_indices == v))[0]
-        if index.size == 0:
-            return default
-        [index] = index.tolist()
-        if not self.edge_values:
-            return {}
-        return {
-            key: val[index].tolist()
-            for key, val in self.edge_values.items()
-            if key not in self.edge_masks or self.edge_masks[key][index]
-        }
-
-    @networkx_api
-    def has_edge(self, u: NodeKey, v: NodeKey) -> bool:
-        if self.key_to_id is not None:
-            try:
-                u = self.key_to_id[u]
-                v = self.key_to_id[v]
-            except KeyError:
-                return False
-        return bool(((self.src_indices == u) & (self.dst_indices == v)).any())
-
-    def _neighbors(self, n: NodeKey) -> cp.ndarray[NodeValue]:
-        if n not in self:
-            hash(n)  # To raise TypeError if appropriate
-            raise nx.NetworkXError(
-                f"The node {n} is not in the {self.__class__.__name__.lower()}."
-            )
-        if self.key_to_id is not None:
-            n = self.key_to_id[n]
-        nbrs = self.dst_indices[self.src_indices == n]
-        if self.is_multigraph():
-            nbrs = cp.unique(nbrs)
-        return nbrs
-
-    @networkx_api
-    def neighbors(self, n: NodeKey) -> Iterator[NodeKey]:
-        nbrs = self._neighbors(n)
-        return iter(self._nodeiter_to_iter(nbrs.tolist()))
-
-    @networkx_api
-    def has_node(self, n: NodeKey) -> bool:
-        return n in self
-
-    @networkx_api
-    def nbunch_iter(self, nbunch=None) -> Iterator[NodeKey]:
-        if nbunch is None:
-            return iter(self)
-        if nbunch in self:
-            return iter([nbunch])
-        return (node for node in nbunch if node in self)
-
-    @networkx_api
-    def number_of_edges(
-        self, u: NodeKey | None = None, v: NodeKey | None = None
-    ) -> int:
-        if u is not None or v is not None:
-            raise NotImplementedError
-        return self.size()
-
-    @networkx_api
-    def number_of_nodes(self) -> int:
-        return self._N
-
-    @networkx_api
-    def order(self) -> int:
-        return self._N
-
-    @networkx_api
-    def size(self, weight: AttrKey | None = None) -> int:
-        if weight is not None:
-            raise NotImplementedError
-        # If no self-edges, then `self.src_indices.size // 2`
-        return int(cp.count_nonzero(self.src_indices <= self.dst_indices))
-
-    @networkx_api
-    def to_directed(self, as_view: bool = False) -> nxcg.CudaDiGraph:
-        return self._copy(as_view, self.to_directed_class())
-
-    @networkx_api
-    def to_undirected(self, as_view: bool = False) -> CudaGraph:
-        # Does deep copy in networkx
-        return self._copy(as_view, self.to_undirected_class())
-
-    def _to_compat_graph(self) -> Graph:
-        rv = self._to_compat_graph_class()()
-        rv._cudagraph = self
-        return rv
-
-    # Not implemented...
-    # adj, adjacency, add_edge, add_edges_from, add_node,
-    # add_nodes_from, add_weighted_edges_from, degree,
-    # edge_subgraph, edges, neighbors, nodes, remove_edge,
-    # remove_edges_from, remove_node, remove_nodes_from, subgraph, update
-
-    ###################
-    # Private methods #
-    ###################
-
-    def _copy(self, as_view: bool, cls: type[CudaGraph], reverse: bool = False):
-        # DRY warning: see also CudaMultiGraph._copy
-        src_indices = self.src_indices
-        dst_indices = self.dst_indices
-        edge_values = self.edge_values
-        edge_masks = self.edge_masks
-        node_values = self.node_values
-        node_masks = self.node_masks
-        key_to_id = self.key_to_id
-        id_to_key = None if key_to_id is None else self._id_to_key
-        if self.__networkx_cache__ is None:
-            __networkx_cache__ = None
-        elif not reverse and cls is self.__class__:
-            __networkx_cache__ = self.__networkx_cache__
-        else:
-            __networkx_cache__ = {}
-        if not as_view:
-            src_indices = src_indices.copy()
-            dst_indices = dst_indices.copy()
-            edge_values = {key: val.copy() for key, val in edge_values.items()}
-            edge_masks = {key: val.copy() for key, val in edge_masks.items()}
-            node_values = {key: val.copy() for key, val in node_values.items()}
-            node_masks = {key: val.copy() for key, val in node_masks.items()}
-            if key_to_id is not None:
-                key_to_id = key_to_id.copy()
-                if id_to_key is not None:
-                    id_to_key = id_to_key.copy()
-            if __networkx_cache__ is not None:
-                __networkx_cache__ = __networkx_cache__.copy()
-        if reverse:
-            src_indices, dst_indices = dst_indices, src_indices
-        rv = cls.from_coo(
-            self._N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=False,
-        )
-        if as_view:
-            rv.graph = self.graph
-        else:
-            rv.graph.update(deepcopy(self.graph))
-        rv.__networkx_cache__ = __networkx_cache__
-        return rv
-
-    def _get_plc_graph(
-        self,
-        edge_attr: AttrKey | None = None,
-        edge_default: EdgeValue | None = None,
-        edge_dtype: Dtype | None = None,
-        *,
-        store_transposed: bool = False,
-        switch_indices: bool = False,
-        edge_array: cp.ndarray[EdgeValue] | None = None,
-        symmetrize: str | None = None,
-    ):
-        if edge_array is not None or edge_attr is None:
-            pass
-        elif edge_attr not in self.edge_values:
-            if edge_default is None:
-                raise KeyError("Graph has no edge attribute {edge_attr!r}")
-            # If we were given a default edge value, then it's probably okay to
-            # use None for the edge_array if we don't have this edge attribute.
-        elif edge_attr not in self.edge_masks:
-            edge_array = self.edge_values[edge_attr]
-        elif not self.edge_masks[edge_attr].all():
-            if edge_default is None:
-                raise NotImplementedError(
-                    "Missing edge attributes is not yet implemented"
-                )
-            edge_array = cp.where(
-                self.edge_masks[edge_attr], self.edge_values[edge_attr], edge_default
-            )
-        else:
-            # Mask is all True; don't need anymore
-            del self.edge_masks[edge_attr]
-            edge_array = self.edge_values[edge_attr]
-        if edge_array is not None:
-            if edge_dtype is not None:
-                edge_dtype = np.dtype(edge_dtype)
-                if edge_array.dtype != edge_dtype:
-                    edge_array = edge_array.astype(edge_dtype)
-            # PLC doesn't handle int edge weights right now, so cast int to float
-            if edge_array.dtype in self._plc_type_map:
-                if edge_array.dtype == np.int64:
-                    if (val := edge_array.max().tolist()) > 2**53:
-                        raise ValueError(
-                            f"Integer value of value is too large (> 2**53): {val}; "
-                            "pylibcugraph only supports float16 and float32 dtypes."
-                        )
-                    if (val := edge_array.min().tolist()) < -(2**53):
-                        raise ValueError(
-                            f"Integer value of value is small large (< -2**53): {val}; "
-                            "pylibcugraph only supports float16 and float32 dtypes."
-                        )
-                elif (
-                    edge_array.dtype == np.uint64 and edge_array.max().tolist() > 2**53
-                ):
-                    raise ValueError(
-                        f"Integer value of value is too large (> 2**53): {val}; "
-                        "pylibcugraph only supports float16 and float32 dtypes."
-                    )
-                # Consider warning here if we add algorithms that may
-                # introduce roundoff errors when using floats as ints.
-                edge_array = edge_array.astype(self._plc_type_map[edge_array.dtype])
-            elif edge_array.dtype not in self._plc_allowed_edge_types:
-                raise TypeError(edge_array.dtype)
-        # Should we cache PLC graph?
-        src_indices = self.src_indices
-        dst_indices = self.dst_indices
-        if switch_indices:
-            src_indices, dst_indices = dst_indices, src_indices
-        if symmetrize is not None:
-            if edge_array is not None:
-                raise NotImplementedError(
-                    "edge_array must be None when symmetrizing the graph"
-                )
-            N = self._N
-            # Upcast to int64 so indices don't overflow
-            src_dst = N * src_indices.astype(np.int64) + dst_indices
-            src_dst_T = src_indices + N * dst_indices.astype(np.int64)
-            if symmetrize == "union":
-                src_dst_new = cp.union1d(src_dst, src_dst_T)
-            elif symmetrize == "intersection":
-                src_dst_new = cp.intersect1d(src_dst, src_dst_T)
-            else:
-                raise ValueError(
-                    f'symmetrize must be "union" or "intersection"; got "{symmetrize}"'
-                )
-            src_indices, dst_indices = cp.divmod(src_dst_new, N)
-            src_indices = src_indices.astype(index_dtype)
-            dst_indices = dst_indices.astype(index_dtype)
-
-        # This sets drop_multi_edges=True for non-multigraph input, which means
-        # the data in self.src_indices and self.dst_indices may not be
-        # identical to that contained in the returned pcl.SGGraph (the returned
-        # SGGraph may have fewer edges since duplicates are dropped). Ideally
-        # self.src_indices and self.dst_indices would be updated to have
-        # duplicate edges removed for non-multigraph instances, but that
-        # requires additional code which would be redundant and likely not as
-        # performant as the code in PLC.
-        return plc.SGGraph(
-            resource_handle=plc.ResourceHandle(),
-            graph_properties=plc.GraphProperties(
-                is_multigraph=self.is_multigraph() and symmetrize is None,
-                is_symmetric=not self.is_directed() or symmetrize is not None,
-            ),
-            src_or_offset_array=src_indices,
-            dst_or_index_array=dst_indices,
-            weight_array=edge_array,
-            store_transposed=store_transposed,
-            renumber=False,
-            do_expensive_check=False,
-            vertices_array=self._node_ids,
-            drop_multi_edges=not self.is_multigraph(),
-        )
-
-    def _sort_edge_indices(self, primary="src"):
-        # DRY warning: see also CudaMultiGraph._sort_edge_indices
-        if primary == "src":
-            stacked = cp.vstack((self.dst_indices, self.src_indices))
-        elif primary == "dst":
-            stacked = cp.vstack((self.src_indices, self.dst_indices))
-        else:
-            raise ValueError(
-                f'Bad `primary` argument; expected "src" or "dst", got {primary!r}'
-            )
-        indices = cp.lexsort(stacked)
-        if (cp.diff(indices) > 0).all():
-            # Already sorted
-            return
-        self.src_indices = self.src_indices[indices]
-        self.dst_indices = self.dst_indices[indices]
-        self.edge_values.update(
-            {key: val[indices] for key, val in self.edge_values.items()}
-        )
-        self.edge_masks.update(
-            {key: val[indices] for key, val in self.edge_masks.items()}
-        )
-
-    def _become(self, other: CudaGraph):
-        if self.__class__ is not other.__class__:
-            raise TypeError(
-                "Attempting to update graph inplace with graph of different type!"
-            )
-        self.clear()
-        edge_values = self.edge_values
-        edge_masks = self.edge_masks
-        node_values = self.node_values
-        node_masks = self.node_masks
-        __networkx_cache__ = self.__networkx_cache__
-        graph = self.graph
-        edge_values.update(other.edge_values)
-        edge_masks.update(other.edge_masks)
-        node_values.update(other.node_values)
-        node_masks.update(other.node_masks)
-        graph.update(other.graph)
-        if other.__networkx_cache__ is None:
-            __networkx_cache__ = None
-        else:
-            if __networkx_cache__ is None:
-                __networkx_cache__ = {}
-            __networkx_cache__.update(other.__networkx_cache__)
-        self.__dict__.update(other.__dict__)
-        self.edge_values = edge_values
-        self.edge_masks = edge_masks
-        self.node_values = node_values
-        self.node_masks = node_masks
-        self.graph = graph
-        self.__networkx_cache__ = __networkx_cache__
-        return self
-
-    def _degrees_array(self, *, ignore_selfloops=False):
-        src_indices = self.src_indices
-        dst_indices = self.dst_indices
-        if ignore_selfloops:
-            not_selfloops = src_indices != dst_indices
-            src_indices = src_indices[not_selfloops]
-            if self.is_directed():
-                dst_indices = dst_indices[not_selfloops]
-        if src_indices.size == 0:
-            return cp.zeros(self._N, dtype=np.int64)
-        degrees = cp.bincount(src_indices, minlength=self._N)
-        if self.is_directed():
-            degrees += cp.bincount(dst_indices, minlength=self._N)
-        return degrees
-
-    _in_degrees_array = _degrees_array
-    _out_degrees_array = _degrees_array
-
-    # Data conversions
-    def _nodekeys_to_nodearray(self, nodes: Iterable[NodeKey]) -> cp.array[IndexValue]:
-        if self.key_to_id is None:
-            return cp.fromiter(nodes, dtype=index_dtype)
-        return cp.fromiter(map(self.key_to_id.__getitem__, nodes), dtype=index_dtype)
-
-    def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
-        """Convert an iterable of node IDs to an iterable of node keys."""
-        if (id_to_key := self.id_to_key) is not None:
-            return map(id_to_key.__getitem__, node_ids)
-        return node_ids
-
-    def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
-        if self.key_to_id is None:
-            return node_ids.tolist()
-        return list(self._nodeiter_to_iter(node_ids.tolist()))
-
-    def _list_to_nodearray(self, nodes: list[NodeKey]) -> cp.ndarray[IndexValue]:
-        if (key_to_id := self.key_to_id) is not None:
-            nodes = [key_to_id[node] for node in nodes]
-        return cp.array(nodes, dtype=index_dtype)
-
-    def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]:
-        if self.key_to_id is None:
-            return set(node_ids.tolist())
-        return set(self._nodeiter_to_iter(node_ids.tolist()))
-
-    def _nodearray_to_dict(
-        self, values: cp.ndarray[NodeValue]
-    ) -> dict[NodeKey, NodeValue]:
-        it = enumerate(values.tolist())
-        if (id_to_key := self.id_to_key) is not None:
-            return {id_to_key[key]: val for key, val in it}
-        return dict(it)
-
-    def _nodearrays_to_dict(
-        self, node_ids: cp.ndarray[IndexValue], values: any_ndarray[NodeValue]
-    ) -> dict[NodeKey, NodeValue]:
-        it = zip(node_ids.tolist(), values.tolist())
-        if (id_to_key := self.id_to_key) is not None:
-            return {id_to_key[key]: val for key, val in it}
-        return dict(it)
-
-    def _edgearrays_to_dict(
-        self,
-        src_ids: cp.ndarray[IndexValue],
-        dst_ids: cp.ndarray[IndexValue],
-        values: cp.ndarray[EdgeValue],
-    ) -> dict[EdgeTuple, EdgeValue]:
-        it = zip(zip(src_ids.tolist(), dst_ids.tolist()), values.tolist())
-        if (id_to_key := self.id_to_key) is not None:
-            return {
-                (id_to_key[src_id], id_to_key[dst_id]): val
-                for (src_id, dst_id), val in it
-            }
-        return dict(it)
-
-    def _dict_to_nodearrays(
-        self,
-        d: dict[NodeKey, NodeValue],
-        dtype: Dtype | None = None,
-    ) -> tuple[cp.ndarray[IndexValue], cp.ndarray[NodeValue]]:
-        if self.key_to_id is None:
-            indices_iter = d
-        else:
-            indices_iter = map(self.key_to_id.__getitem__, d)
-        node_ids = cp.fromiter(indices_iter, index_dtype)
-        if dtype is None:
-            values = cp.array(list(d.values()))
-        else:
-            values = cp.fromiter(d.values(), dtype)
-        return node_ids, values
-
-    def _dict_to_nodearray(
-        self,
-        d: dict[NodeKey, NodeValue] | cp.ndarray[NodeValue],
-        default: NodeValue | None = None,
-        dtype: Dtype | None = None,
-    ) -> cp.ndarray[NodeValue]:
-        if isinstance(d, cp.ndarray):
-            if d.shape[0] != len(self):
-                raise ValueError
-            if dtype is not None and d.dtype != dtype:
-                return d.astype(dtype)
-            return d
-        if default is None:
-            val_iter = map(d.__getitem__, self)
-        else:
-            val_iter = (d.get(node, default) for node in self)
-        if dtype is None:
-            return cp.array(list(val_iter))
-        return cp.fromiter(val_iter, dtype)
diff --git a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py
deleted file mode 100644
index 5a6595567d2..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import networkx as nx
-
-import nx_cugraph as nxcg
-
-from .digraph import CudaDiGraph, DiGraph
-from .graph import Graph
-from .multigraph import CudaMultiGraph, MultiGraph
-
-__all__ = ["CudaMultiDiGraph", "MultiDiGraph"]
-
-networkx_api = nxcg.utils.decorators.networkx_class(nx.MultiDiGraph)
-
-
-class MultiDiGraph(nx.MultiDiGraph, MultiGraph, DiGraph):
-    name = Graph.name
-    _node = Graph._node
-    _adj = DiGraph._adj
-    _succ = DiGraph._succ
-    _pred = DiGraph._pred
-
-    @classmethod
-    @networkx_api
-    def is_directed(cls) -> bool:
-        return True
-
-    @classmethod
-    @networkx_api
-    def is_multigraph(cls) -> bool:
-        return True
-
-    @classmethod
-    def to_cudagraph_class(cls) -> type[CudaMultiDiGraph]:
-        return CudaMultiDiGraph
-
-    @classmethod
-    def to_networkx_class(cls) -> type[nx.MultiDiGraph]:
-        return nx.MultiDiGraph
-
-
-class CudaMultiDiGraph(CudaMultiGraph, CudaDiGraph):
-    is_directed = classmethod(MultiDiGraph.is_directed.__func__)
-    is_multigraph = classmethod(MultiDiGraph.is_multigraph.__func__)
-    to_cudagraph_class = classmethod(MultiDiGraph.to_cudagraph_class.__func__)
-    to_networkx_class = classmethod(MultiDiGraph.to_networkx_class.__func__)
-
-    @classmethod
-    def _to_compat_graph_class(cls) -> type[MultiDiGraph]:
-        return MultiDiGraph
-
-    ##########################
-    # NetworkX graph methods #
-    ##########################
-
-    @networkx_api
-    def to_undirected(self, reciprocal=False, as_view=False):
-        raise NotImplementedError
diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py
deleted file mode 100644
index c8c8f1dfb00..00000000000
--- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py
+++ /dev/null
@@ -1,552 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-from copy import deepcopy
-from typing import TYPE_CHECKING, ClassVar
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-
-from ..utils import index_dtype
-from .graph import CudaGraph, Graph, _GraphCache
-
-if TYPE_CHECKING:
-    from nx_cugraph.typing import (
-        AttrKey,
-        EdgeKey,
-        EdgeValue,
-        IndexValue,
-        NodeKey,
-        NodeValue,
-        any_ndarray,
-    )
-__all__ = ["MultiGraph", "CudaMultiGraph"]
-
-networkx_api = nxcg.utils.decorators.networkx_class(nx.MultiGraph)
-
-
-class MultiGraph(nx.MultiGraph, Graph):
-    name = Graph.name
-    _node = Graph._node
-    _adj = Graph._adj
-
-    @classmethod
-    @networkx_api
-    def is_directed(cls) -> bool:
-        return False
-
-    @classmethod
-    @networkx_api
-    def is_multigraph(cls) -> bool:
-        return True
-
-    @classmethod
-    def to_cudagraph_class(cls) -> type[CudaMultiGraph]:
-        return CudaMultiGraph
-
-    @classmethod
-    @networkx_api
-    def to_directed_class(cls) -> type[nxcg.MultiDiGraph]:
-        return nxcg.MultiDiGraph
-
-    @classmethod
-    def to_networkx_class(cls) -> type[nx.MultiGraph]:
-        return nx.MultiGraph
-
-    @classmethod
-    @networkx_api
-    def to_undirected_class(cls) -> type[MultiGraph]:
-        return MultiGraph
-
-    def __init__(self, incoming_graph_data=None, multigraph_input=None, **attr):
-        super().__init__(incoming_graph_data, multigraph_input, **attr)
-        self.__networkx_cache__ = _GraphCache(self)
-
-    ####################
-    # Creation methods #
-    ####################
-
-    @classmethod
-    def from_coo(
-        cls,
-        N: int,
-        src_indices: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_indices: cp.ndarray[IndexValue] | None = None,
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        edge_keys: list[EdgeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> MultiGraph | CudaMultiGraph:
-        new_graph = super(cls.to_undirected_class(), cls).from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            use_compat_graph=False,
-            **attr,
-        )
-        new_graph.edge_indices = edge_indices
-        new_graph.edge_keys = edge_keys
-        # Easy and fast sanity checks
-        if (
-            new_graph.edge_keys is not None
-            and len(new_graph.edge_keys) != src_indices.size
-        ):
-            raise ValueError
-        if use_compat_graph or use_compat_graph is None and issubclass(cls, Graph):
-            new_graph = new_graph._to_compat_graph()
-        return new_graph
-
-    @classmethod
-    def from_csr(
-        cls,
-        indptr: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_indices: cp.ndarray[IndexValue] | None = None,
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        edge_keys: list[EdgeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> MultiGraph | CudaMultiGraph:
-        N = indptr.size - 1
-        src_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            edge_keys=edge_keys,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_csc(
-        cls,
-        indptr: cp.ndarray[IndexValue],
-        src_indices: cp.ndarray[IndexValue],
-        edge_indices: cp.ndarray[IndexValue] | None = None,
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        edge_keys: list[EdgeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> MultiGraph | CudaMultiGraph:
-        N = indptr.size - 1
-        dst_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            edge_keys=edge_keys,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_dcsr(
-        cls,
-        N: int,
-        compressed_srcs: cp.ndarray[IndexValue],
-        indptr: cp.ndarray[IndexValue],
-        dst_indices: cp.ndarray[IndexValue],
-        edge_indices: cp.ndarray[IndexValue] | None = None,
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        edge_keys: list[EdgeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> MultiGraph | CudaMultiGraph:
-        src_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(compressed_srcs.get(), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            edge_keys=edge_keys,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-    @classmethod
-    def from_dcsc(
-        cls,
-        N: int,
-        compressed_dsts: cp.ndarray[IndexValue],
-        indptr: cp.ndarray[IndexValue],
-        src_indices: cp.ndarray[IndexValue],
-        edge_indices: cp.ndarray[IndexValue] | None = None,
-        edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None,
-        edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None,
-        node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None,
-        node_masks: dict[AttrKey, any_ndarray[bool]] | None = None,
-        *,
-        key_to_id: dict[NodeKey, IndexValue] | None = None,
-        id_to_key: list[NodeKey] | None = None,
-        edge_keys: list[EdgeKey] | None = None,
-        use_compat_graph: bool | None = None,
-        **attr,
-    ) -> MultiGraph | CudaGraph:
-        dst_indices = cp.array(
-            # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(compressed_dsts.get(), cp.diff(indptr).get())
-        )
-        return cls.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            edge_keys=edge_keys,
-            use_compat_graph=use_compat_graph,
-            **attr,
-        )
-
-
-class CudaMultiGraph(CudaGraph):
-    # networkx properties
-    edge_key_dict_factory: ClassVar[type] = dict
-
-    # Not networkx properties
-
-    # In a MultiGraph, each edge has a unique `(src, dst, key)` key.
-    # By default, `key` is 0 if possible, else 1, else 2, etc.
-    # This key can be any hashable Python object in NetworkX.
-    # We don't use a dict for our data structure here, because
-    # that would require a `(src, dst, key)` key.
-    # Instead, we keep `edge_keys` and/or `edge_indices`.
-    # `edge_keys` is the list of Python objects for each edge.
-    # `edge_indices` is for the common case of default multiedge keys,
-    # in which case we can store it as a cupy array.
-    # `edge_indices` is generally preferred. It is possible to provide
-    # both where edge_indices is the default and edge_keys is anything.
-    # It is also possible for them both to be None, which means the
-    # default edge indices has not yet been calculated.
-    edge_indices: cp.ndarray[IndexValue] | None
-    edge_keys: list[EdgeKey] | None
-
-    ####################
-    # Creation methods #
-    ####################
-
-    from_coo = classmethod(MultiGraph.from_coo.__func__)
-    from_csr = classmethod(MultiGraph.from_csr.__func__)
-    from_csc = classmethod(MultiGraph.from_csc.__func__)
-    from_dcsr = classmethod(MultiGraph.from_dcsr.__func__)
-    from_dcsc = classmethod(MultiGraph.from_dcsc.__func__)
-
-    def __new__(
-        cls, incoming_graph_data=None, multigraph_input=None, **attr
-    ) -> CudaMultiGraph:
-        if isinstance(incoming_graph_data, dict) and multigraph_input is not False:
-            new_graph = nxcg.from_networkx(
-                nx.MultiGraph(incoming_graph_data, multigraph_input=multigraph_input),
-                preserve_all_attrs=True,
-            )
-        else:
-            new_graph = super().__new__(cls, incoming_graph_data)
-        new_graph.graph.update(attr)
-        return new_graph
-
-    #################
-    # Class methods #
-    #################
-
-    is_directed = classmethod(MultiGraph.is_directed.__func__)
-    is_multigraph = classmethod(MultiGraph.is_multigraph.__func__)
-    to_cudagraph_class = classmethod(MultiGraph.to_cudagraph_class.__func__)
-    to_networkx_class = classmethod(MultiGraph.to_networkx_class.__func__)
-
-    @classmethod
-    @networkx_api
-    def to_directed_class(cls) -> type[nxcg.CudaMultiDiGraph]:
-        return nxcg.CudaMultiDiGraph
-
-    @classmethod
-    @networkx_api
-    def to_undirected_class(cls) -> type[CudaMultiGraph]:
-        return CudaMultiGraph
-
-    @classmethod
-    def _to_compat_graph_class(cls) -> type[MultiGraph]:
-        return MultiGraph
-
-    ##########################
-    # NetworkX graph methods #
-    ##########################
-
-    @networkx_api
-    def clear(self) -> None:
-        super().clear()
-        self.edge_indices = None
-        self.edge_keys = None
-
-    @networkx_api
-    def clear_edges(self) -> None:
-        super().clear_edges()
-        self.edge_indices = None
-        self.edge_keys = None
-
-    @networkx_api
-    def copy(self, as_view: bool = False) -> CudaMultiGraph:
-        # Does shallow copy in networkx
-        return self._copy(as_view, self.__class__)
-
-    @networkx_api
-    def get_edge_data(
-        self,
-        u: NodeKey,
-        v: NodeKey,
-        key: EdgeKey | None = None,
-        default: EdgeValue | None = None,
-    ):
-        if self.key_to_id is not None:
-            try:
-                u = self.key_to_id[u]
-                v = self.key_to_id[v]
-            except KeyError:
-                return default
-        else:
-            try:
-                if u < 0 or v < 0 or u >= self._N or v >= self._N:
-                    return default
-            except TypeError:
-                return default
-        mask = (self.src_indices == u) & (self.dst_indices == v)
-        if not mask.any():
-            return default
-        if self.edge_keys is None:
-            if self.edge_indices is None:
-                self._calculate_edge_indices()
-            if key is not None:
-                try:
-                    mask = mask & (self.edge_indices == key)
-                except TypeError:
-                    return default
-        indices = cp.nonzero(mask)[0]
-        if indices.size == 0:
-            return default
-        edge_keys = self.edge_keys
-        if key is not None and edge_keys is not None:
-            mask[[i for i in indices.tolist() if edge_keys[i] != key]] = False
-            indices = cp.nonzero(mask)[0]
-            if indices.size == 0:
-                return default
-        if key is not None:
-            [index] = indices.tolist()
-            return {
-                k: v[index].tolist()
-                for k, v in self.edge_values.items()
-                if k not in self.edge_masks or self.edge_masks[k][index]
-            }
-        return {
-            edge_keys[index] if edge_keys is not None else index: {
-                k: v[index].tolist()
-                for k, v in self.edge_values.items()
-                if k not in self.edge_masks or self.edge_masks[k][index]
-            }
-            for index in indices.tolist()
-        }
-
-    @networkx_api
-    def has_edge(self, u: NodeKey, v: NodeKey, key: EdgeKey | None = None) -> bool:
-        if self.key_to_id is not None:
-            try:
-                u = self.key_to_id[u]
-                v = self.key_to_id[v]
-            except KeyError:
-                return False
-        mask = (self.src_indices == u) & (self.dst_indices == v)
-        if key is None or (self.edge_indices is None and self.edge_keys is None):
-            return bool(mask.any())
-        if self.edge_keys is None:
-            try:
-                return bool((mask & (self.edge_indices == key)).any())
-            except TypeError:
-                return False
-        indices = cp.nonzero(mask)[0]
-        if indices.size == 0:
-            return False
-        edge_keys = self.edge_keys
-        return any(edge_keys[i] == key for i in indices.tolist())
-
-    @networkx_api
-    def to_directed(self, as_view: bool = False) -> nxcg.CudaMultiDiGraph:
-        return self._copy(as_view, self.to_directed_class())
-
-    @networkx_api
-    def to_undirected(self, as_view: bool = False) -> CudaMultiGraph:
-        # Does deep copy in networkx
-        return self._copy(as_view, self.to_undirected_class())
-
-    ###################
-    # Private methods #
-    ###################
-
-    def _copy(self, as_view: bool, cls: type[CudaGraph], reverse: bool = False):
-        # DRY warning: see also CudaGraph._copy
-        src_indices = self.src_indices
-        dst_indices = self.dst_indices
-        edge_indices = self.edge_indices
-        edge_values = self.edge_values
-        edge_masks = self.edge_masks
-        node_values = self.node_values
-        node_masks = self.node_masks
-        key_to_id = self.key_to_id
-        id_to_key = None if key_to_id is None else self._id_to_key
-        edge_keys = self.edge_keys
-        if self.__networkx_cache__ is None:
-            __networkx_cache__ = None
-        elif not reverse and cls is self.__class__:
-            __networkx_cache__ = self.__networkx_cache__
-        else:
-            __networkx_cache__ = {}
-        if not as_view:
-            src_indices = src_indices.copy()
-            dst_indices = dst_indices.copy()
-            edge_indices = edge_indices.copy()
-            edge_values = {key: val.copy() for key, val in edge_values.items()}
-            edge_masks = {key: val.copy() for key, val in edge_masks.items()}
-            node_values = {key: val.copy() for key, val in node_values.items()}
-            node_masks = {key: val.copy() for key, val in node_masks.items()}
-            if key_to_id is not None:
-                key_to_id = key_to_id.copy()
-                if id_to_key is not None:
-                    id_to_key = id_to_key.copy()
-            if edge_keys is not None:
-                edge_keys = edge_keys.copy()
-            if __networkx_cache__ is not None:
-                __networkx_cache__ = __networkx_cache__.copy()
-        if reverse:
-            src_indices, dst_indices = dst_indices, src_indices
-        rv = cls.from_coo(
-            self._N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            id_to_key=id_to_key,
-            edge_keys=edge_keys,
-            use_compat_graph=False,
-        )
-        if as_view:
-            rv.graph = self.graph
-        else:
-            rv.graph.update(deepcopy(self.graph))
-        rv.__networkx_cache__ = __networkx_cache__
-        return rv
-
-    def _sort_edge_indices(self, primary="src"):
-        # DRY warning: see also CudaGraph._sort_edge_indices
-        if self.edge_indices is None and self.edge_keys is None:
-            return super()._sort_edge_indices(primary=primary)
-        if primary == "src":
-            if self.edge_indices is None:
-                stacked = (self.dst_indices, self.src_indices)
-            else:
-                stacked = (self.edge_indices, self.dst_indices, self.src_indices)
-        elif primary == "dst":
-            if self.edge_indices is None:
-                stacked = (self.src_indices, self.dst_indices)
-            else:
-                stacked = (self.edge_indices, self.dst_indices, self.src_indices)
-        else:
-            raise ValueError(
-                f'Bad `primary` argument; expected "src" or "dst", got {primary!r}'
-            )
-        indices = cp.lexsort(cp.vstack(stacked))
-        if (cp.diff(indices) > 0).all():
-            # Already sorted
-            return
-        self.src_indices = self.src_indices[indices]
-        self.dst_indices = self.dst_indices[indices]
-        self.edge_values.update(
-            {key: val[indices] for key, val in self.edge_values.items()}
-        )
-        self.edge_masks.update(
-            {key: val[indices] for key, val in self.edge_masks.items()}
-        )
-        if self.edge_indices is not None:
-            self.edge_indices = self.edge_indices[indices]
-        if self.edge_keys is not None:
-            edge_keys = self.edge_keys
-            self.edge_keys = [edge_keys[i] for i in indices.tolist()]
diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py
deleted file mode 100644
index a872f13ac70..00000000000
--- a/python/nx-cugraph/nx_cugraph/convert.py
+++ /dev/null
@@ -1,875 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import functools
-import itertools
-import operator as op
-from collections import Counter, defaultdict
-from collections.abc import Mapping
-from typing import TYPE_CHECKING
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from .utils import index_dtype, networkx_algorithm
-from .utils.misc import _And_NotImplementedError, pairwise
-
-if _nxver >= (3, 4):
-    from networkx.utils.backends import _get_cache_key, _get_from_cache, _set_to_cache
-
-if TYPE_CHECKING:  # pragma: no cover
-    from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue, any_ndarray
-
-__all__ = [
-    "from_networkx",
-    "to_networkx",
-    "from_dict_of_lists",
-    "to_dict_of_lists",
-]
-
-concat = itertools.chain.from_iterable
-# A "required" attribute is one that all edges or nodes must have or KeyError is raised
-REQUIRED = ...
-
-
-def _iterate_values(graph, adj, is_dicts, func):
-    # Using `dict.values` is faster and is the common case, but it doesn't always work
-    if is_dicts is not False:
-        it = concat(map(dict.values, adj.values()))
-        if graph is not None and graph.is_multigraph():
-            it = concat(map(dict.values, it))
-        try:
-            return func(it), True
-        except TypeError:
-            if is_dicts is True:
-                raise
-    # May not be regular dicts
-    it = concat(x.values() for x in adj.values())
-    if graph is not None and graph.is_multigraph():
-        it = concat(x.values() for x in it)
-    return func(it), False
-
-
-# Consider adding this to `utils` if it is useful elsewhere
-def _fallback_decorator(func):
-    """Catch and convert exceptions to ``NotImplementedError``; use as a decorator.
-
-    ``nx.NetworkXError`` are raised without being converted. This allows
-    falling back to other backends if, for example, conversion to GPU failed.
-    """
-
-    @functools.wraps(func)
-    def inner(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except nx.NetworkXError:
-            raise
-        except Exception as exc:
-            raise _And_NotImplementedError(exc) from exc
-
-    return inner
-
-
-@_fallback_decorator
-def from_networkx(
-    graph: nx.Graph,
-    edge_attrs: AttrKey | dict[AttrKey, EdgeValue | None] | None = None,
-    edge_dtypes: Dtype | dict[AttrKey, Dtype | None] | None = None,
-    *,
-    node_attrs: AttrKey | dict[AttrKey, NodeValue | None] | None = None,
-    node_dtypes: Dtype | dict[AttrKey, Dtype | None] | None = None,
-    preserve_all_attrs: bool = False,
-    preserve_edge_attrs: bool = False,
-    preserve_node_attrs: bool = False,
-    preserve_graph_attrs: bool = False,
-    as_directed: bool = False,
-    name: str | None = None,
-    graph_name: str | None = None,
-    use_compat_graph: bool | None = False,
-) -> nxcg.Graph | nxcg.CudaGraph:
-    """Convert a networkx graph to nx_cugraph graph; can convert all attributes.
-
-    Parameters
-    ----------
-    G : networkx.Graph
-    edge_attrs : str or dict, optional
-        Dict that maps edge attributes to default values if missing in ``G``.
-        If None, then no edge attributes will be converted.
-        If default value is None, then missing values are handled with a mask.
-        A default value of ``nxcg.convert.REQUIRED`` or ``...`` indicates that
-        all edges have data for this attribute, and raise `KeyError` if not.
-        For convenience, `edge_attrs` may be a single attribute with default 1;
-        for example ``edge_attrs="weight"``.
-    edge_dtypes : dtype or dict, optional
-    node_attrs : str or dict, optional
-        Dict that maps node attributes to default values if missing in ``G``.
-        If None, then no node attributes will be converted.
-        If default value is None, then missing values are handled with a mask.
-        A default value of ``nxcg.convert.REQUIRED`` or ``...`` indicates that
-        all edges have data for this attribute, and raise `KeyError` if not.
-        For convenience, `node_attrs` may be a single attribute with no default;
-        for example ``node_attrs="weight"``.
-    node_dtypes : dtype or dict, optional
-    preserve_all_attrs : bool, default False
-        If True, then equivalent to setting preserve_edge_attrs, preserve_node_attrs,
-        and preserve_graph_attrs to True.
-    preserve_edge_attrs : bool, default False
-        Whether to preserve all edge attributes.
-    preserve_node_attrs : bool, default False
-        Whether to preserve all node attributes.
-    preserve_graph_attrs : bool, default False
-        Whether to preserve all graph attributes.
-    as_directed : bool, default False
-        If True, then the returned graph will be directed regardless of input.
-        If False, then the returned graph type is determined by input graph.
-    name : str, optional
-        The name of the algorithm when dispatched from networkx.
-    graph_name : str, optional
-        The name of the graph argument geing converted when dispatched from networkx.
-    use_compat_graph : bool or None, default False
-        Indicate whether to return a graph that is compatible with NetworkX graph.
-        For example, ``nx_cugraph.Graph`` can be used as a NetworkX graph and can
-        reside in host (CPU) or device (GPU) memory. The default is False, which
-        will return e.g. ``nx_cugraph.CudaGraph`` that only resides on device (GPU)
-        and is not fully compatible as a NetworkX graph.
-
-    Returns
-    -------
-    nx_cugraph.Graph or nx_cugraph.CudaGraph
-
-    Notes
-    -----
-    For optimal performance, be as specific as possible about what is being converted:
-
-    1. Do you need edge values? Creating a graph with just the structure is the fastest.
-    2. Do you know the edge attribute(s) you need? Specify with `edge_attrs`.
-    3. Do you know the default values? Specify with ``edge_attrs={weight: default}``.
-    4. Do you know if all edges have values? Specify with ``edge_attrs={weight: ...}``.
-    5. Do you know the dtype of attributes? Specify with `edge_dtypes`.
-
-    Conversely, using ``preserve_edge_attrs=True`` or ``preserve_all_attrs=True`` are
-    the slowest, but are also the most flexible and generic.
-
-    See Also
-    --------
-    to_networkx : The opposite; convert nx_cugraph graph to networkx graph
-    """
-    # This uses `graph._adj` and `graph._node`, which are private attributes in NetworkX
-    if not isinstance(graph, nx.Graph):
-        if isinstance(graph, nx.classes.reportviews.NodeView):
-            # Convert to a Graph with only nodes (no edges)
-            G = nx.Graph()
-            G.add_nodes_from(graph.items())
-            graph = G
-        else:
-            raise TypeError(f"Expected networkx.Graph; got {type(graph)}")
-    elif isinstance(graph, nxcg.Graph):
-        if (
-            use_compat_graph
-            # Use compat graphs by default
-            or use_compat_graph is None
-            and (_nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs)
-        ):
-            return graph
-        if graph._is_on_gpu:
-            return graph._cudagraph
-        if not graph._is_on_cpu:
-            raise RuntimeError(
-                f"{type(graph).__name__} cannot be converted to the GPU, because it is "
-                "not on the CPU! This is not supposed to be possible. If you believe "
-                "you have found a bug, please report a minimum reproducible example to "
-                "https://github.com/rapidsai/cugraph/issues/new/choose"
-            )
-        if _nxver >= (3, 4):
-            cache_key = _get_cache_key(
-                edge_attrs=edge_attrs,
-                node_attrs=node_attrs,
-                preserve_edge_attrs=preserve_edge_attrs,
-                preserve_node_attrs=preserve_node_attrs,
-                preserve_graph_attrs=preserve_graph_attrs,
-            )
-            cache = getattr(graph, "__networkx_cache__", None)
-            if cache is not None:
-                cache = cache.setdefault("backends", {}).setdefault("cugraph", {})
-                compat_key, rv = _get_from_cache(cache, cache_key)
-                if rv is not None:
-                    if isinstance(rv, nxcg.Graph):
-                        # This shouldn't happen during normal use, but be extra-careful
-                        rv = rv._cudagraph
-                    if rv is not None:
-                        return rv
-
-    if preserve_all_attrs:
-        preserve_edge_attrs = True
-        preserve_node_attrs = True
-        preserve_graph_attrs = True
-
-    if edge_attrs is not None:
-        if isinstance(edge_attrs, Mapping):
-            # Copy so we don't mutate the original
-            edge_attrs = dict(edge_attrs)
-        else:
-            edge_attrs = {edge_attrs: 1}
-
-    if node_attrs is not None:
-        if isinstance(node_attrs, Mapping):
-            # Copy so we don't mutate the original
-            node_attrs = dict(node_attrs)
-        else:
-            node_attrs = {node_attrs: None}
-
-    if graph.__class__ in {
-        nx.Graph,
-        nx.DiGraph,
-        nx.MultiGraph,
-        nx.MultiDiGraph,
-    } or isinstance(graph, nxcg.Graph):
-        # This is a NetworkX private attribute, but is much faster to use
-        adj = graph._adj
-    else:
-        adj = graph.adj
-    if isinstance(adj, nx.classes.coreviews.FilterAdjacency):
-        adj = {k: dict(v) for k, v in adj.items()}
-
-    is_dicts = None
-    N = len(adj)
-    if (
-        not preserve_edge_attrs
-        and not edge_attrs
-        # Faster than graph.number_of_edges() == 0
-        or next(concat(rowdata.values() for rowdata in adj.values()), None) is None
-    ):
-        # Either we weren't asked to preserve edge attributes, or there are no edges
-        edge_attrs = None
-    elif preserve_edge_attrs:
-        attr_sets, is_dicts = _iterate_values(
-            graph, adj, is_dicts, lambda it: set(map(frozenset, it))
-        )
-        attrs = frozenset.union(*attr_sets)
-        edge_attrs = dict.fromkeys(attrs, REQUIRED)
-        if len(attr_sets) > 1:
-            # Determine which edges have missing data
-            for attr, count in Counter(concat(attr_sets)).items():
-                if count != len(attr_sets):
-                    edge_attrs[attr] = None
-    elif None in edge_attrs.values():
-        # Required edge attributes have a default of None in `edge_attrs`
-        # Verify all edge attributes are present!
-        required = frozenset(
-            attr for attr, default in edge_attrs.items() if default is None
-        )
-        if len(required) == 1:
-            # Fast path for the common case of a single attribute with no default
-            [attr] = required
-            if graph.is_multigraph():
-                it = (
-                    attr in edgedata
-                    for rowdata in adj.values()
-                    for multiedges in rowdata.values()
-                    for edgedata in multiedges.values()
-                )
-            else:
-                it = (
-                    attr in edgedata
-                    for rowdata in adj.values()
-                    for edgedata in rowdata.values()
-                )
-            if next(it):
-                if all(it):
-                    # All edges have data
-                    edge_attrs[attr] = REQUIRED
-                # Else some edges have attribute (default already None)
-            elif not any(it):
-                # No edges have attribute
-                del edge_attrs[attr]
-            # Else some edges have attribute (default already None)
-        else:
-            attr_sets, is_dicts = _iterate_values(
-                graph, adj, is_dicts, lambda it: set(map(required.intersection, it))
-            )
-            for attr in required - frozenset.union(*attr_sets):
-                # No edges have these attributes
-                del edge_attrs[attr]
-            for attr in frozenset.intersection(*attr_sets):
-                # All edges have these attributes
-                edge_attrs[attr] = REQUIRED
-
-    if N == 0:
-        node_attrs = None
-    elif preserve_node_attrs:
-        attr_sets = set(map(frozenset, graph._node.values()))
-        attrs = frozenset.union(*attr_sets)
-        node_attrs = dict.fromkeys(attrs, REQUIRED)
-        if len(attr_sets) > 1:
-            # Determine which nodes have missing data
-            for attr, count in Counter(concat(attr_sets)).items():
-                if count != len(attr_sets):
-                    node_attrs[attr] = None
-    elif node_attrs and None in node_attrs.values():
-        # Required node attributes have a default of None in `node_attrs`
-        # Verify all node attributes are present!
-        required = frozenset(
-            attr for attr, default in node_attrs.items() if default is None
-        )
-        if len(required) == 1:
-            # Fast path for the common case of a single attribute with no default
-            [attr] = required
-            it = (attr in nodedata for nodedata in graph._node.values())
-            if next(it):
-                if all(it):
-                    # All nodes have data
-                    node_attrs[attr] = REQUIRED
-                # Else some nodes have attribute (default already None)
-            elif not any(it):
-                # No nodes have attribute
-                del node_attrs[attr]
-            # Else some nodes have attribute (default already None)
-        else:
-            attr_sets = set(map(required.intersection, graph._node.values()))
-            for attr in required - frozenset.union(*attr_sets):
-                # No nodes have these attributes
-                del node_attrs[attr]
-            for attr in frozenset.intersection(*attr_sets):
-                # All nodes have these attributes
-                node_attrs[attr] = REQUIRED
-
-    key_to_id = dict(zip(adj, range(N)))
-    dst_iter = concat(adj.values())
-    try:
-        no_renumber = all(k == v for k, v in key_to_id.items())
-    except Exception:
-        no_renumber = False
-    if no_renumber:
-        key_to_id = None
-    else:
-        dst_iter = map(key_to_id.__getitem__, dst_iter)
-    if graph.is_multigraph():
-        dst_indices = np.fromiter(dst_iter, index_dtype)
-        num_multiedges, is_dicts = _iterate_values(
-            None, adj, is_dicts, lambda it: np.fromiter(map(len, it), index_dtype)
-        )
-        # cp.repeat is slow to use here, so use numpy instead
-        dst_indices = cp.array(np.repeat(dst_indices, num_multiedges))
-        # Determine edge keys and edge ids for multigraphs
-        if is_dicts:
-            edge_keys = list(concat(concat(map(dict.values, adj.values()))))
-            it = concat(map(dict.values, adj.values()))
-        else:
-            edge_keys = list(concat(concat(x.values() for x in adj.values())))
-            it = concat(x.values() for x in adj.values())
-        edge_indices = cp.fromiter(concat(map(range, map(len, it))), index_dtype)
-        if edge_keys == edge_indices.tolist():
-            edge_keys = None  # Prefer edge_indices
-    else:
-        dst_indices = cp.fromiter(dst_iter, index_dtype)
-
-    edge_values = {}
-    edge_masks = {}
-    if edge_attrs:
-        if edge_dtypes is None:
-            edge_dtypes = {}
-        elif not isinstance(edge_dtypes, Mapping):
-            edge_dtypes = dict.fromkeys(edge_attrs, edge_dtypes)
-        for edge_attr, edge_default in edge_attrs.items():
-            dtype = edge_dtypes.get(edge_attr)
-            if edge_default is None:
-                vals = []
-                append = vals.append
-                if graph.is_multigraph():
-                    iter_mask = (
-                        append(
-                            edgedata[edge_attr]
-                            if (present := edge_attr in edgedata)
-                            else False
-                        )
-                        or present
-                        for rowdata in adj.values()
-                        for multiedges in rowdata.values()
-                        for edgedata in multiedges.values()
-                    )
-                else:
-                    iter_mask = (
-                        append(
-                            edgedata[edge_attr]
-                            if (present := edge_attr in edgedata)
-                            else False
-                        )
-                        or present
-                        for rowdata in adj.values()
-                        for edgedata in rowdata.values()
-                    )
-                edge_masks[edge_attr] = cp.fromiter(iter_mask, bool)
-                edge_values[edge_attr] = cp.array(vals, dtype)
-                # if vals.ndim > 1: ...
-            elif edge_default is REQUIRED:
-                if dtype is None:
-
-                    def func(it, edge_attr=edge_attr):
-                        return cp.array(list(map(op.itemgetter(edge_attr), it)))
-
-                else:
-
-                    def func(it, edge_attr=edge_attr, dtype=dtype):
-                        return cp.fromiter(map(op.itemgetter(edge_attr), it), dtype)
-
-                edge_value, is_dicts = _iterate_values(graph, adj, is_dicts, func)
-                edge_values[edge_attr] = edge_value
-            else:
-                if graph.is_multigraph():
-                    iter_values = (
-                        edgedata.get(edge_attr, edge_default)
-                        for rowdata in adj.values()
-                        for multiedges in rowdata.values()
-                        for edgedata in multiedges.values()
-                    )
-                else:
-                    iter_values = (
-                        edgedata.get(edge_attr, edge_default)
-                        for rowdata in adj.values()
-                        for edgedata in rowdata.values()
-                    )
-                if dtype is None:
-                    edge_values[edge_attr] = cp.array(list(iter_values))
-                else:
-                    edge_values[edge_attr] = cp.fromiter(iter_values, dtype)
-            # if vals.ndim > 1: ...
-
-    # cp.repeat is slow to use here, so use numpy instead
-    src_indices = np.repeat(
-        np.arange(N, dtype=index_dtype),
-        np.fromiter(map(len, adj.values()), index_dtype),
-    )
-    if graph.is_multigraph():
-        src_indices = np.repeat(src_indices, num_multiedges)
-    src_indices = cp.array(src_indices)
-
-    node_values = {}
-    node_masks = {}
-    if node_attrs:
-        nodes = graph._node
-        if node_dtypes is None:
-            node_dtypes = {}
-        elif not isinstance(node_dtypes, Mapping):
-            node_dtypes = dict.fromkeys(node_attrs, node_dtypes)
-        for node_attr, node_default in node_attrs.items():
-            # Iterate over `adj` to ensure consistent order
-            dtype = node_dtypes.get(node_attr)
-            if node_default is None:
-                vals = []
-                append = vals.append
-                iter_mask = (
-                    append(
-                        nodedata[node_attr]
-                        if (present := node_attr in (nodedata := nodes[node_id]))
-                        else False
-                    )
-                    or present
-                    for node_id in adj
-                )
-                # Node values may be numpy or cupy arrays (useful for str, object, etc).
-                # Someday we'll let the user choose np or cp, and support edge values.
-                node_mask = np.fromiter(iter_mask, bool)
-                try:
-                    node_value = np.array(vals, dtype)
-                except ValueError:
-                    # Handle e.g. list elements
-                    if dtype is None or dtype == object:
-                        node_value = np.fromiter(vals, object)
-                    else:
-                        raise
-                else:
-                    try:
-                        node_value = cp.array(node_value)
-                    except ValueError:
-                        pass
-                    else:
-                        node_mask = cp.array(node_mask)
-                node_values[node_attr] = node_value
-                node_masks[node_attr] = node_mask
-                # if vals.ndim > 1: ...
-            else:
-                if node_default is REQUIRED:
-                    iter_values = (nodes[node_id][node_attr] for node_id in adj)
-                else:
-                    iter_values = (
-                        nodes[node_id].get(node_attr, node_default) for node_id in adj
-                    )
-                # Node values may be numpy or cupy arrays (useful for str, object, etc).
-                # Someday we'll let the user choose np or cp, and support edge values.
-                if dtype is None:
-                    vals = list(iter_values)
-                    try:
-                        node_value = np.array(vals)
-                    except ValueError:
-                        # Handle e.g. list elements
-                        node_value = np.fromiter(vals, object)
-                else:
-                    node_value = np.fromiter(iter_values, dtype)
-                try:
-                    node_value = cp.array(node_value)
-                except ValueError:
-                    pass
-                node_values[node_attr] = node_value
-                # if vals.ndim > 1: ...
-    if graph.is_multigraph():
-        if graph.is_directed() or as_directed:
-            klass = nxcg.CudaMultiDiGraph
-        else:
-            klass = nxcg.CudaMultiGraph
-        rv = klass.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            edge_keys=edge_keys,
-            use_compat_graph=False,
-        )
-    else:
-        if graph.is_directed() or as_directed:
-            klass = nxcg.CudaDiGraph
-        else:
-            klass = nxcg.CudaGraph
-        rv = klass.from_coo(
-            N,
-            src_indices,
-            dst_indices,
-            edge_values,
-            edge_masks,
-            node_values,
-            node_masks,
-            key_to_id=key_to_id,
-            use_compat_graph=False,
-        )
-    if preserve_graph_attrs:
-        rv.graph.update(graph.graph)  # deepcopy?
-    if _nxver >= (3, 4) and isinstance(graph, nxcg.Graph) and cache is not None:
-        # Make sure this conversion is added to the cache, and make all of
-        # our graphs share the same `.graph` attribute for consistency.
-        rv.graph = graph.graph
-        _set_to_cache(cache, cache_key, rv)
-    if (
-        use_compat_graph
-        # Use compat graphs by default
-        or use_compat_graph is None
-        and (_nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs)
-    ):
-        return rv._to_compat_graph()
-    return rv
-
-
-def _to_tuples(ndim, L):
-    if ndim > 2:
-        L = list(map(_to_tuples.__get__(ndim - 1), L))
-    return list(map(tuple, L))
-
-
-def _array_to_tuples(a):
-    """Like ``a.tolist()``, but nested structures are tuples instead of lists.
-
-    This is only different from ``a.tolist()`` if ``a.ndim > 1``. It is used to
-    try to return tuples instead of lists for e.g. node values.
-    """
-    if a.ndim > 1:
-        return _to_tuples(a.ndim, a.tolist())
-    return a.tolist()
-
-
-def _iter_attr_dicts(
-    values: dict[AttrKey, any_ndarray[EdgeValue | NodeValue]],
-    masks: dict[AttrKey, any_ndarray[bool]],
-):
-    full_attrs = list(values.keys() - masks.keys())
-    if full_attrs:
-        full_dicts = (
-            dict(zip(full_attrs, vals))
-            for vals in zip(*(_array_to_tuples(values[attr]) for attr in full_attrs))
-        )
-    partial_attrs = list(values.keys() & masks.keys())
-    if partial_attrs:
-        partial_dicts = (
-            {k: v for k, (v, m) in zip(partial_attrs, vals_masks) if m}
-            for vals_masks in zip(
-                *(
-                    zip(values[attr].tolist(), masks[attr].tolist())
-                    for attr in partial_attrs
-                )
-            )
-        )
-    if full_attrs and partial_attrs:
-        full_dicts = (d1.update(d2) or d1 for d1, d2 in zip(full_dicts, partial_dicts))
-    elif partial_attrs:
-        full_dicts = partial_dicts
-    return full_dicts
-
-
-def to_networkx(
-    G: nxcg.Graph | nxcg.CudaGraph, *, sort_edges: bool = False
-) -> nx.Graph:
-    """Convert a nx_cugraph graph to networkx graph.
-
-    All edge and node attributes and ``G.graph`` properties are converted.
-
-    Parameters
-    ----------
-    G : nx_cugraph.Graph or nx_cugraph.CudaGraph
-    sort_edges : bool, default False
-        Whether to sort the edge data of the input graph by (src, dst) indices
-        before converting. This can be useful to convert to networkx graphs
-        that iterate over edges consistently since edges are stored in dicts
-        in the order they were added.
-
-    Returns
-    -------
-    networkx.Graph
-
-    See Also
-    --------
-    from_networkx : The opposite; convert networkx graph to nx_cugraph graph
-    """
-    if isinstance(G, nxcg.Graph):
-        # These graphs are already NetworkX graphs :)
-        return G
-    rv = G.to_networkx_class()()
-    id_to_key = G.id_to_key
-    if sort_edges:
-        G._sort_edge_indices()
-
-    node_values = G.node_values
-    node_masks = G.node_masks
-    if node_values:
-        node_iter = range(len(G))
-        if id_to_key is not None:
-            node_iter = map(id_to_key.__getitem__, node_iter)
-        full_node_dicts = _iter_attr_dicts(node_values, node_masks)
-        rv.add_nodes_from(zip(node_iter, full_node_dicts))
-    elif id_to_key is not None:
-        rv.add_nodes_from(id_to_key)
-    else:
-        rv.add_nodes_from(range(len(G)))
-
-    src_indices = G.src_indices
-    dst_indices = G.dst_indices
-    edge_values = G.edge_values
-    edge_masks = G.edge_masks
-    if not G.is_directed():
-        # Only add upper triangle of the adjacency matrix so we don't double-add edges
-        mask = src_indices <= dst_indices
-        src_indices = src_indices[mask]
-        dst_indices = dst_indices[mask]
-        if edge_values:
-            edge_values = {k: v[mask] for k, v in edge_values.items()}
-        if edge_masks:
-            edge_masks = {k: v[mask] for k, v in edge_masks.items()}
-    src_indices = src_iter = src_indices.tolist()
-    dst_indices = dst_iter = dst_indices.tolist()
-    if id_to_key is not None:
-        src_iter = map(id_to_key.__getitem__, src_indices)
-        dst_iter = map(id_to_key.__getitem__, dst_indices)
-    if G.is_multigraph() and (G.edge_keys is not None or G.edge_indices is not None):
-        if G.edge_keys is not None:
-            if not G.is_directed():
-                edge_keys = [k for k, m in zip(G.edge_keys, mask.tolist()) if m]
-            else:
-                edge_keys = G.edge_keys
-        elif not G.is_directed():
-            edge_keys = G.edge_indices[mask].tolist()
-        else:
-            edge_keys = G.edge_indices.tolist()
-        if edge_values:
-            full_edge_dicts = _iter_attr_dicts(edge_values, edge_masks)
-            rv.add_edges_from(zip(src_iter, dst_iter, edge_keys, full_edge_dicts))
-        else:
-            rv.add_edges_from(zip(src_iter, dst_iter, edge_keys))
-    elif edge_values:
-        full_edge_dicts = _iter_attr_dicts(edge_values, edge_masks)
-        rv.add_edges_from(zip(src_iter, dst_iter, full_edge_dicts))
-    else:
-        rv.add_edges_from(zip(src_iter, dst_iter))
-
-    rv.graph.update(G.graph)
-    return rv
-
-
-def _to_graph(
-    G,
-    edge_attr: AttrKey | None = None,
-    edge_default: EdgeValue | None = 1,
-    edge_dtype: Dtype | None = None,
-) -> nxcg.CudaGraph | nxcg.CudaDiGraph:
-    """Ensure that input type is a nx_cugraph graph, and convert if necessary.
-
-    Directed and undirected graphs are both allowed.
-    This is an internal utility function and may change or be removed.
-    """
-    if isinstance(G, nxcg.CudaGraph):
-        return G
-    if isinstance(G, nx.Graph):
-        return from_networkx(
-            G, {edge_attr: edge_default} if edge_attr is not None else None, edge_dtype
-        )
-    # TODO: handle cugraph.Graph
-    raise TypeError
-
-
-def _to_directed_graph(
-    G,
-    edge_attr: AttrKey | None = None,
-    edge_default: EdgeValue | None = 1,
-    edge_dtype: Dtype | None = None,
-) -> nxcg.CudaDiGraph:
-    """Ensure that input type is a nx_cugraph CudaDiGraph, and convert if necessary.
-
-    Undirected graphs will be converted to directed.
-    This is an internal utility function and may change or be removed.
-    """
-    if isinstance(G, nxcg.CudaDiGraph):
-        return G
-    if isinstance(G, nxcg.CudaGraph):
-        return G.to_directed()
-    if isinstance(G, nx.Graph):
-        return from_networkx(
-            G,
-            {edge_attr: edge_default} if edge_attr is not None else None,
-            edge_dtype,
-            as_directed=True,
-        )
-    # TODO: handle cugraph.Graph
-    raise TypeError
-
-
-def _to_undirected_graph(
-    G,
-    edge_attr: AttrKey | None = None,
-    edge_default: EdgeValue | None = 1,
-    edge_dtype: Dtype | None = None,
-) -> nxcg.CudaGraph:
-    """Ensure that input type is a nx_cugraph CudaGraph, and convert if necessary.
-
-    Only undirected graphs are allowed. Directed graphs will raise ValueError.
-    This is an internal utility function and may change or be removed.
-    """
-    if isinstance(G, nxcg.CudaGraph):
-        if G.is_directed():
-            raise ValueError("Only undirected graphs supported; got a directed graph")
-        return G
-    if isinstance(G, nx.Graph):
-        return from_networkx(
-            G, {edge_attr: edge_default} if edge_attr is not None else None, edge_dtype
-        )
-    # TODO: handle cugraph.Graph
-    raise TypeError
-
-
-@networkx_algorithm(version_added="24.08", fallback=True)
-def from_dict_of_lists(d, create_using=None):
-    from .generators._utils import _create_using_class
-
-    graph_class, inplace = _create_using_class(create_using)
-    key_to_id = defaultdict(itertools.count().__next__)
-    src_indices = cp.array(
-        # cp.repeat is slow to use here, so use numpy instead
-        np.repeat(
-            np.fromiter(map(key_to_id.__getitem__, d), index_dtype),
-            np.fromiter(map(len, d.values()), index_dtype),
-        )
-    )
-    dst_indices = cp.fromiter(
-        map(key_to_id.__getitem__, concat(d.values())), index_dtype
-    )
-    # Initialize as directed first them symmetrize if undirected.
-    G = graph_class.to_directed_class().from_coo(
-        len(key_to_id),
-        src_indices,
-        dst_indices,
-        key_to_id=key_to_id,
-    )
-    if not graph_class.is_directed():
-        G = G.to_undirected()
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="24.08")
-def to_dict_of_lists(G, nodelist=None):
-    G = _to_graph(G)
-    src_indices = G.src_indices
-    dst_indices = G.dst_indices
-    if nodelist is not None:
-        try:
-            node_ids = G._nodekeys_to_nodearray(nodelist)
-        except KeyError as exc:
-            gname = "digraph" if G.is_directed() else "graph"
-            raise nx.NetworkXError(
-                f"The node {exc.args[0]} is not in the {gname}."
-            ) from exc
-        mask = cp.isin(src_indices, node_ids) & cp.isin(dst_indices, node_ids)
-        src_indices = src_indices[mask]
-        dst_indices = dst_indices[mask]
-    # Sort indices so we can use `cp.unique` to determine boundaries.
-    # This is like exporting to DCSR.
-    if G.is_multigraph():
-        stacked = cp.unique(cp.vstack((src_indices, dst_indices)), axis=1)
-        src_indices = stacked[0]
-        dst_indices = stacked[1]
-    else:
-        stacked = cp.vstack((dst_indices, src_indices))
-        indices = cp.lexsort(stacked)
-        src_indices = src_indices[indices]
-        dst_indices = dst_indices[indices]
-    compressed_srcs, left_bounds = cp.unique(src_indices, return_index=True)
-    # Ensure we include isolate nodes in the result (and in proper order)
-    rv = None
-    if nodelist is not None:
-        if compressed_srcs.size != len(nodelist):
-            if G.key_to_id is None:
-                # `G._nodekeys_to_nodearray` does not check for valid node keys.
-                container = range(G._N)
-                for key in nodelist:
-                    if key not in container:
-                        gname = "digraph" if G.is_directed() else "graph"
-                        raise nx.NetworkXError(f"The node {key} is not in the {gname}.")
-            rv = {key: [] for key in nodelist}
-    elif compressed_srcs.size != G._N:
-        rv = {key: [] for key in G}
-    # We use `boundaries` like this in `_groupby` too
-    boundaries = pairwise(itertools.chain(left_bounds.tolist(), [src_indices.size]))
-    dst_indices = dst_indices.tolist()
-    if G.key_to_id is None:
-        it = zip(compressed_srcs.tolist(), boundaries)
-        if rv is None:
-            return {src: dst_indices[start:end] for src, (start, end) in it}
-        rv.update((src, dst_indices[start:end]) for src, (start, end) in it)
-        return rv
-    to_key = G.id_to_key.__getitem__
-    it = zip(compressed_srcs.tolist(), boundaries)
-    if rv is None:
-        return {
-            to_key(src): list(map(to_key, dst_indices[start:end]))
-            for src, (start, end) in it
-        }
-    rv.update(
-        (to_key(src), list(map(to_key, dst_indices[start:end])))
-        for src, (start, end) in it
-    )
-    return rv
diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py
deleted file mode 100644
index 54975902861..00000000000
--- a/python/nx-cugraph/nx_cugraph/convert_matrix.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-from nx_cugraph import _nxver
-
-from .generators._utils import _create_using_class
-from .utils import _cp_iscopied_asarray, index_dtype, networkx_algorithm
-
-__all__ = [
-    "from_pandas_edgelist",
-    "from_scipy_sparse_array",
-]
-
-
-# Value columns with string dtype is not supported
-@networkx_algorithm(is_incomplete=True, version_added="23.12", fallback=True)
-def from_pandas_edgelist(
-    df,
-    source="source",
-    target="target",
-    edge_attr=None,
-    create_using=None,
-    edge_key=None,
-):
-    """cudf.DataFrame inputs also supported; value columns with str is unsuppported."""
-    # This function never shares ownership of the underlying arrays of the DataFrame
-    # columns. We will perform a copy if necessary even if given e.g. a cudf.DataFrame.
-    graph_class, inplace = _create_using_class(create_using)
-    # Try to be optimal whether using pandas, cudf, or cudf.pandas
-    src_series = df[source]
-    dst_series = df[target]
-    try:
-        # Optimistically try to use cupy, but fall back to numpy if necessary
-        src_array = src_series.to_cupy()
-        dst_array = dst_series.to_cupy()
-    except (AttributeError, TypeError, ValueError, NotImplementedError):
-        src_array = src_series.to_numpy()
-        dst_array = dst_series.to_numpy()
-    try:
-        # Minimize unnecessary data copies by tracking whether we copy or not
-        is_src_copied, src_array = _cp_iscopied_asarray(
-            src_array, orig_object=src_series
-        )
-        is_dst_copied, dst_array = _cp_iscopied_asarray(
-            dst_array, orig_object=dst_series
-        )
-        np_or_cp = cp
-    except ValueError:
-        is_src_copied = is_dst_copied = False
-        src_array = np.asarray(src_array)
-        dst_array = np.asarray(dst_array)
-        np_or_cp = np
-    # TODO: create renumbering helper function(s)
-    # Renumber step 0: node keys
-    nodes = np_or_cp.unique(np_or_cp.concatenate([src_array, dst_array]))
-    N = nodes.size
-    kwargs = {}
-    if N > 0 and (
-        nodes[0] != 0
-        or nodes[N - 1] != N - 1
-        or (
-            nodes.dtype.kind not in {"i", "u"}
-            and not (nodes == np_or_cp.arange(N, dtype=np.int64)).all()
-        )
-    ):
-        # We need to renumber indices--np_or_cp.searchsorted to the rescue!
-        kwargs["id_to_key"] = nodes.tolist()
-        src_indices = cp.asarray(np_or_cp.searchsorted(nodes, src_array), index_dtype)
-        dst_indices = cp.asarray(np_or_cp.searchsorted(nodes, dst_array), index_dtype)
-    else:
-        # Copy if necessary so we don't share ownership of input arrays.
-        if is_src_copied:
-            src_indices = src_array
-        else:
-            src_indices = cp.array(src_array)
-        if is_dst_copied:
-            dst_indices = dst_array
-        else:
-            dst_indices = cp.array(dst_array)
-
-    if not graph_class.is_directed():
-        # Symmetrize the edges
-        mask = src_indices != dst_indices
-        if mask.all():
-            mask = None
-        src_indices, dst_indices = (
-            cp.hstack(
-                (src_indices, dst_indices[mask] if mask is not None else dst_indices)
-            ),
-            cp.hstack(
-                (dst_indices, src_indices[mask] if mask is not None else src_indices)
-            ),
-        )
-
-    if edge_attr is not None:
-        # Additional columns requested for edge data
-        if edge_attr is True:
-            attr_col_headings = df.columns.difference({source, target}).to_list()
-        elif isinstance(edge_attr, (list, tuple)):
-            attr_col_headings = edge_attr
-        else:
-            attr_col_headings = [edge_attr]
-        if len(attr_col_headings) == 0:
-            raise nx.NetworkXError(
-                "Invalid edge_attr argument: No columns found with name: "
-                f"{attr_col_headings}"
-            )
-        try:
-            edge_values = {
-                key: cp.array(val.to_numpy())
-                for key, val in df[attr_col_headings].items()
-            }
-        except (KeyError, TypeError) as exc:
-            raise nx.NetworkXError(f"Invalid edge_attr argument: {edge_attr}") from exc
-
-        if not graph_class.is_directed():
-            # Symmetrize the edges
-            edge_values = {
-                key: cp.hstack((val, val[mask] if mask is not None else val))
-                for key, val in edge_values.items()
-            }
-        kwargs["edge_values"] = edge_values
-
-    if (
-        graph_class.is_multigraph()
-        and edge_key is not None
-        and (
-            # In nx <= 3.3, `edge_key` was ignored if `edge_attr` is None
-            edge_attr is not None
-            or _nxver > (3, 3)
-        )
-    ):
-        try:
-            edge_keys = df[edge_key].to_list()
-        except (KeyError, TypeError) as exc:
-            raise nx.NetworkXError(f"Invalid edge_key argument: {edge_key}") from exc
-        if not graph_class.is_directed():
-            # Symmetrize the edges; remember, `edge_keys` is a list!
-            if mask is None:
-                edge_keys *= 2
-            else:
-                edge_keys += [
-                    key for keep, key in zip(mask.tolist(), edge_keys) if keep
-                ]
-        kwargs["edge_keys"] = edge_keys
-
-    G = graph_class.from_coo(N, src_indices, dst_indices, **kwargs)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12", fallback=True)
-def from_scipy_sparse_array(
-    A, parallel_edges=False, create_using=None, edge_attribute="weight"
-):
-    graph_class, inplace = _create_using_class(create_using)
-    m, n = A.shape
-    if m != n:
-        raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
-    if A.format != "coo":
-        A = A.tocoo()
-    if A.dtype.kind in {"i", "u"} and graph_class.is_multigraph() and parallel_edges:
-        src_indices = cp.array(np.repeat(A.row, A.data), index_dtype)
-        dst_indices = cp.array(np.repeat(A.col, A.data), index_dtype)
-        weight = cp.empty(src_indices.size, A.data.dtype)
-        weight[:] = 1
-    else:
-        src_indices = cp.array(A.row, index_dtype)
-        dst_indices = cp.array(A.col, index_dtype)
-        weight = cp.array(A.data)
-    G = graph_class.from_coo(
-        n, src_indices, dst_indices, edge_values={"weight": weight}
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
diff --git a/python/nx-cugraph/nx_cugraph/generators/__init__.py b/python/nx-cugraph/nx_cugraph/generators/__init__.py
deleted file mode 100644
index 60a9d92373a..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .classic import *
-from .community import *
-from .ego import *
-from .small import *
-from .social import *
diff --git a/python/nx-cugraph/nx_cugraph/generators/_utils.py b/python/nx-cugraph/nx_cugraph/generators/_utils.py
deleted file mode 100644
index bc9ab84bdad..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/_utils.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import operator as op
-
-import cupy as cp
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import index_dtype
-
-# 3.2.1 fixed some issues in generators that occur in 3.2 and earlier
-_IS_NX32_OR_LESS = (nxver := nx.__version__)[:3] <= "3.2" and (
-    len(nxver) <= 3 or nxver[3] != "." and not nxver[3].isdigit()
-)
-
-
-def _ensure_int(n):
-    """Ensure n is integral."""
-    return op.index(n)
-
-
-def _ensure_nonnegative_int(n):
-    """Ensure n is a nonnegative integer."""
-    n = op.index(n)
-    if n < 0:
-        raise nx.NetworkXError(f"Negative number of nodes not valid: {n}")
-    return n
-
-
-def _complete_graph_indices(n):
-    all_indices = cp.indices((n, n), dtype=index_dtype)
-    src_indices = all_indices[0].ravel()
-    dst_indices = all_indices[1].ravel()
-    del all_indices
-    mask = src_indices != dst_indices
-    return (src_indices[mask], dst_indices[mask])
-
-
-def _common_small_graph(n, nodes, create_using, *, allow_directed=True):
-    """Create a "common graph" for small n.
-
-    n == 0: empty graph
-    n == 1: empty graph
-    n == 2: complete graph
-    n > 2: undefined
-    """
-    graph_class, inplace = _create_using_class(create_using)
-    if not allow_directed and graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    if n < 2:
-        G = graph_class.from_coo(
-            n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes
-        )
-    else:
-        G = graph_class.from_coo(
-            n,
-            cp.arange(2, dtype=index_dtype),
-            cp.array([1, 0], index_dtype),
-            id_to_key=nodes,
-        )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-def _create_using_class(create_using, *, default=nx.Graph):
-    """Handle ``create_using`` argument and return a Graph type from nx_cugraph."""
-    inplace = False
-    if create_using is None:
-        G = default()
-    elif isinstance(create_using, type):
-        G = create_using()
-    elif not hasattr(create_using, "is_directed") or not hasattr(
-        create_using, "is_multigraph"
-    ):
-        raise TypeError("create_using is not a valid graph type or instance")
-    elif not isinstance(create_using, (nxcg.Graph, nxcg.CudaGraph)):
-        raise NotImplementedError(
-            f"create_using with object of type {type(create_using)} is not supported "
-            "by the cugraph backend; only nx_cugraph.Graph or nx_cugraph.CudaGraph "
-            "objects are allowed."
-        )
-    else:
-        inplace = True
-        G = create_using
-        G.clear()
-    if not isinstance(G, (nxcg.Graph, nxcg.CudaGraph)):
-        if G.is_multigraph():
-            if G.is_directed():
-                graph_class = nxcg.MultiDiGraph
-            else:
-                graph_class = nxcg.MultiGraph
-        elif G.is_directed():
-            graph_class = nxcg.DiGraph
-        else:
-            graph_class = nxcg.Graph
-        if _nxver >= (3, 3) and not nx.config.backends.cugraph.use_compat_graphs:
-            graph_class = graph_class.to_cudagraph_class()
-        if G.__class__ not in {nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph}:
-            raise NotImplementedError(
-                f"create_using with type {type(G)} is not supported by the cugraph "
-                "backend; only standard networkx or nx_cugraph graph objects are "
-                "allowed (but not customized subclasses derived from them)."
-            )
-    else:
-        graph_class = G.__class__
-    return graph_class, inplace
-
-
-def _number_and_nodes(n_and_nodes):
-    n, nodes = n_and_nodes
-    try:
-        n = op.index(n)
-    except TypeError:
-        n = len(nodes)
-    if n < 0:
-        raise nx.NetworkXError(f"Negative number of nodes not valid: {n}")
-    if not isinstance(nodes, list):
-        nodes = list(nodes)
-    if not nodes:
-        return (n, None)
-    if nodes[0] == 0 and nodes[n - 1] == n - 1:
-        try:
-            if nodes == list(range(n)):
-                return (n, None)
-        except Exception:
-            pass
-    return (n, nodes)
diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py
deleted file mode 100644
index cfcb2a3afec..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/classic.py
+++ /dev/null
@@ -1,420 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import itertools
-from numbers import Integral
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import _get_int_dtype, index_dtype, networkx_algorithm
-from ._utils import (
-    _IS_NX32_OR_LESS,
-    _common_small_graph,
-    _complete_graph_indices,
-    _create_using_class,
-    _ensure_int,
-    _ensure_nonnegative_int,
-    _number_and_nodes,
-)
-
-__all__ = [
-    "barbell_graph",
-    "circular_ladder_graph",
-    "complete_graph",
-    "complete_multipartite_graph",
-    "cycle_graph",
-    "empty_graph",
-    "ladder_graph",
-    "lollipop_graph",
-    "null_graph",
-    "path_graph",
-    "star_graph",
-    "tadpole_graph",
-    "trivial_graph",
-    "turan_graph",
-    "wheel_graph",
-]
-
-concat = itertools.chain.from_iterable
-
-
-@networkx_algorithm(version_added="23.12")
-def barbell_graph(m1, m2, create_using=None):
-    # Like two complete graphs and a path_graph
-    m1 = _ensure_nonnegative_int(m1)
-    if m1 < 2:
-        raise nx.NetworkXError("Invalid graph description, m1 should be >=2")
-    m2 = _ensure_nonnegative_int(m2)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_bell1, dst_bell1 = _complete_graph_indices(m1)
-    src_bell2 = src_bell1 + (m1 + m2)
-    dst_bell2 = dst_bell1 + (m1 + m2)
-    if m2 == 0:
-        src_bar = cp.array([m1 - 1, m1], index_dtype)
-        dst_bar = cp.array([m1, m1 - 1], index_dtype)
-    else:
-        src_bar = cp.arange(2 * m1 - 1, 2 * m1 + 2 * m2 + 1, dtype=index_dtype) // 2
-        dst_bar = (
-            cp.arange(m1 - 1, m1 + m2 + 1, dtype=index_dtype)[:, None]
-            + cp.array([-1, 1], index_dtype)
-        ).ravel()[1:-1]
-    src_indices = cp.hstack((src_bell1, src_bar, src_bell2))
-    dst_indices = cp.hstack((dst_bell1, dst_bar, dst_bell2))
-    G = graph_class.from_coo(2 * m1 + m2, src_indices, dst_indices)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def circular_ladder_graph(n, create_using=None):
-    return _ladder_graph(n, create_using, is_circular=True)
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def complete_graph(n, create_using=None):
-    n, nodes = _number_and_nodes(n)
-    if n < 3:
-        return _common_small_graph(n, nodes, create_using)
-    graph_class, inplace = _create_using_class(create_using)
-    src_indices, dst_indices = _complete_graph_indices(n)
-    G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def complete_multipartite_graph(*subset_sizes):
-    if not subset_sizes:
-        if _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs:
-            return nxcg.Graph()
-        return nxcg.CudaGraph()
-    try:
-        subset_sizes = [_ensure_int(size) for size in subset_sizes]
-    except TypeError:
-        subsets = [list(subset) for subset in subset_sizes]
-        subset_sizes = [len(subset) for subset in subsets]
-        nodes = list(concat(subsets))
-    else:
-        subsets = nodes = None
-        try:
-            subset_sizes = [_ensure_nonnegative_int(size) for size in subset_sizes]
-        except nx.NetworkXError:
-            if _IS_NX32_OR_LESS:
-                raise NotImplementedError("Negative number of nodes is not supported")
-            raise
-    L1 = []
-    L2 = []
-    total = 0
-    for size in subset_sizes:
-        all_indices = cp.indices((total, size), dtype=index_dtype)
-        L1.append(all_indices[0].ravel())
-        L2.append(all_indices[1].ravel() + total)
-        total += size
-    src_indices = cp.hstack(L1 + L2)
-    dst_indices = cp.hstack(L2 + L1)
-    subsets_array = cp.array(
-        np.repeat(
-            np.arange(len(subset_sizes), dtype=_get_int_dtype(len(subset_sizes) - 1)),
-            subset_sizes,
-        )
-    )
-    return nxcg.Graph.from_coo(
-        subsets_array.size,
-        src_indices,
-        dst_indices,
-        node_values={"subset": subsets_array},
-        id_to_key=nodes,
-        use_compat_graph=_nxver < (3, 3)
-        or nx.config.backends.cugraph.use_compat_graphs,
-    )
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def cycle_graph(n, create_using=None):
-    n, nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using)
-    if n == 1:
-        src_indices = cp.zeros(1, index_dtype)
-        dst_indices = cp.zeros(1, index_dtype)
-    elif n == 2 and graph_class.is_multigraph() and not graph_class.is_directed():
-        # This is kind of a peculiar edge case
-        src_indices = cp.array([0, 0, 1, 1], index_dtype)
-        dst_indices = cp.array([1, 1, 0, 0], index_dtype)
-    elif n < 3:
-        return _common_small_graph(n, nodes, create_using)
-    elif graph_class.is_directed():
-        src_indices = cp.arange(n, dtype=index_dtype)
-        dst_indices = cp.arange(1, n + 1, dtype=index_dtype)
-        dst_indices[-1] = 0
-    else:
-        src_indices = cp.arange(2 * n, dtype=index_dtype) // 2
-        dst_indices = (
-            cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype)
-        ).ravel()
-        dst_indices[0] = n - 1
-        dst_indices[-1] = 0
-    G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def empty_graph(n=0, create_using=None, default=nx.Graph):
-    n, nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using, default=default)
-    G = graph_class.from_coo(
-        n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-def _ladder_graph(n, create_using, *, is_circular=False):
-    # Like path path_graph with extra arange, and middle link missing
-    n = _ensure_nonnegative_int(n)
-    if n < 2:
-        if not is_circular:
-            return _common_small_graph(2 * n, None, create_using, allow_directed=False)
-        graph_class, inplace = _create_using_class(create_using)
-        if graph_class.is_directed():
-            raise nx.NetworkXError("Directed Graph not supported")
-        if n == 1:
-            src_indices = cp.array([0, 1, 0, 1], index_dtype)
-            dst_indices = cp.array([0, 0, 1, 1], index_dtype)
-            nodes = None
-        elif graph_class.is_multigraph():
-            src_indices = cp.array([0, 0, 1, 1], index_dtype)
-            dst_indices = cp.array([1, 1, 0, 0], index_dtype)
-            nodes = [0, -1]
-        else:
-            src_indices = cp.array([0, 1], index_dtype)
-            dst_indices = cp.array([1, 0], index_dtype)
-            nodes = [0, -1]
-        G = graph_class.from_coo(2, src_indices, dst_indices, id_to_key=nodes)
-        if inplace:
-            return create_using._become(G)
-        return G
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    path_src = cp.arange(1, 2 * n - 1, dtype=index_dtype) // 2
-    path_dst = (
-        cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype)
-    ).ravel()[1:-1]
-    srcs = [path_src, path_src + n, cp.arange(2 * n, dtype=index_dtype)]
-    dsts = [
-        path_dst,
-        path_dst + n,
-        cp.arange(n, 2 * n, dtype=index_dtype),
-        cp.arange(0, n, dtype=index_dtype),
-    ]
-    if is_circular and (n > 2 or graph_class.is_multigraph()):
-        srcs.append(cp.array([0, n - 1, n, 2 * n - 1], index_dtype))
-        dsts.append(cp.array([n - 1, 0, 2 * n - 1, n], index_dtype))
-    src_indices = cp.hstack(srcs)
-    dst_indices = cp.hstack(dsts)
-    G = graph_class.from_coo(2 * n, src_indices, dst_indices)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def ladder_graph(n, create_using=None):
-    return _ladder_graph(n, create_using)
-
-
-@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12")
-def lollipop_graph(m, n, create_using=None):
-    # Like complete_graph then path_graph
-    orig_m, unused_nodes_m = m
-    orig_n, unused_nodes_n = n
-    m, m_nodes = _number_and_nodes(m)
-    if m < 2:
-        raise nx.NetworkXError(
-            "Invalid description: m should indicate at least 2 nodes"
-        )
-    n, n_nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    msrc_indices, mdst_indices = _complete_graph_indices(m)
-    nsrc_indices = cp.arange(2 * m - 1, 2 * m + 2 * n - 1, dtype=index_dtype) // 2
-    ndst_indices = (
-        cp.arange(m - 1, m + n, dtype=index_dtype)[:, None]
-        + cp.array([-1, 1], index_dtype)
-    ).ravel()[1:-1]
-    src_indices = cp.hstack((msrc_indices, nsrc_indices))
-    dst_indices = cp.hstack((mdst_indices, ndst_indices))
-    if isinstance(orig_m, Integral) and isinstance(orig_n, Integral):
-        nodes = None
-    else:
-        nodes = list(range(m)) if m_nodes is None else m_nodes
-        nodes.extend(range(n) if n_nodes is None else n_nodes)
-        if len(set(nodes)) != len(nodes):
-            raise nx.NetworkXError("Nodes must be distinct in containers m and n")
-    G = graph_class.from_coo(m + n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def null_graph(create_using=None):
-    return _common_small_graph(0, None, create_using)
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def path_graph(n, create_using=None):
-    n, nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        src_indices = cp.arange(n - 1, dtype=index_dtype)
-        dst_indices = cp.arange(1, n, dtype=index_dtype)
-    elif n < 3:
-        return _common_small_graph(n, nodes, create_using)
-    else:
-        src_indices = cp.arange(1, 2 * n - 1, dtype=index_dtype) // 2
-        dst_indices = (
-            cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype)
-        ).ravel()[1:-1]
-    G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def star_graph(n, create_using=None):
-    orig_n, orig_nodes = n
-    n, nodes = _number_and_nodes(n)
-    # star_graph behaves differently whether the input was an int or iterable
-    if isinstance(orig_n, Integral):
-        if nodes is not None:
-            nodes.append(n)
-        n += 1
-    if n < 3:
-        return _common_small_graph(n, nodes, create_using, allow_directed=False)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    flat = cp.zeros(n - 1, index_dtype)
-    ramp = cp.arange(1, n, dtype=index_dtype)
-    src_indices = cp.hstack((flat, ramp))
-    dst_indices = cp.hstack((ramp, flat))
-    G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12")
-def tadpole_graph(m, n, create_using=None):
-    orig_m, unused_nodes_m = m
-    orig_n, unused_nodes_n = n
-    m, m_nodes = _number_and_nodes(m)
-    if m < 2:
-        raise nx.NetworkXError(
-            "Invalid description: m should indicate at least 2 nodes"
-        )
-    n, n_nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    if isinstance(orig_m, Integral) and isinstance(orig_n, Integral):
-        nodes = None
-    else:
-        nodes = list(range(m)) if m_nodes is None else m_nodes
-        nodes.extend(range(n) if n_nodes is None else n_nodes)
-    if m == 2 and not graph_class.is_multigraph():
-        src_indices = cp.arange(1, 2 * (m + n) - 1, dtype=index_dtype) // 2
-        dst_indices = (
-            cp.arange((m + n), dtype=index_dtype)[:, None]
-            + cp.array([-1, 1], index_dtype)
-        ).ravel()[1:-1]
-    else:
-        src_indices = cp.arange(2 * (m + n), dtype=index_dtype) // 2
-        dst_indices = (
-            cp.arange((m + n), dtype=index_dtype)[:, None]
-            + cp.array([-1, 1], index_dtype)
-        ).ravel()
-        dst_indices[0] = m - 1
-        dst_indices[-1] = 0
-    G = graph_class.from_coo(m + n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def trivial_graph(create_using=None):
-    return _common_small_graph(1, None, create_using)
-
-
-@networkx_algorithm(version_added="23.12")
-def turan_graph(n, r):
-    if not 1 <= r <= n:
-        raise nx.NetworkXError("Must satisfy 1 <= r <= n")
-    n_div_r, n_mod_r = divmod(n, r)
-    partitions = [n_div_r] * (r - n_mod_r) + [n_div_r + 1] * n_mod_r
-    return complete_multipartite_graph(*partitions)
-
-
-@networkx_algorithm(nodes_or_number=0, version_added="23.12")
-def wheel_graph(n, create_using=None):
-    n, nodes = _number_and_nodes(n)
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    if n < 2:
-        G = graph_class.from_coo(
-            n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes
-        )
-    else:
-        # Like star_graph
-        flat = cp.zeros(n - 1, index_dtype)
-        ramp = cp.arange(1, n, dtype=index_dtype)
-        # Like cycle_graph
-        if n < 3:
-            src_indices = cp.empty(0, index_dtype)
-            dst_indices = cp.empty(0, index_dtype)
-        elif n > 3:
-            src_indices = cp.arange(2, 2 * n, dtype=index_dtype) // 2
-            dst_indices = (
-                cp.arange(1, n, dtype=index_dtype)[:, None]
-                + cp.array([-1, 1], index_dtype)
-            ).ravel()
-            dst_indices[-1] = 1
-            dst_indices[0] = n - 1
-        elif graph_class.is_multigraph():
-            src_indices = cp.array([1, 1, 2, 2], index_dtype)
-            dst_indices = cp.array([2, 2, 1, 1], index_dtype)
-        else:
-            src_indices = cp.array([1, 2], index_dtype)
-            dst_indices = cp.array([2, 1], index_dtype)
-        src_indices = cp.hstack((flat, ramp, src_indices))
-        dst_indices = cp.hstack((ramp, flat, dst_indices))
-        G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes)
-    if inplace:
-        return create_using._become(G)
-    return G
diff --git a/python/nx-cugraph/nx_cugraph/generators/community.py b/python/nx-cugraph/nx_cugraph/generators/community.py
deleted file mode 100644
index 4e5063cc345..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/community.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import networkx_algorithm
-from ._utils import (
-    _common_small_graph,
-    _complete_graph_indices,
-    _ensure_int,
-    _ensure_nonnegative_int,
-)
-
-__all__ = [
-    "caveman_graph",
-]
-
-
-@networkx_algorithm(version_added="23.12")
-def caveman_graph(l, k):  # noqa: E741
-    l = _ensure_int(l)  # noqa: E741
-    k = _ensure_int(k)
-    N = _ensure_nonnegative_int(k * l)
-    if l == 0 or k < 1:
-        return _common_small_graph(N, None, None)
-    k = _ensure_nonnegative_int(k)
-    src_clique, dst_clique = _complete_graph_indices(k)
-    src_cliques = [src_clique]
-    dst_cliques = [dst_clique]
-    src_cliques.extend(src_clique + i * k for i in range(1, l))
-    dst_cliques.extend(dst_clique + i * k for i in range(1, l))
-    src_indices = cp.hstack(src_cliques)
-    dst_indices = cp.hstack(dst_cliques)
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        l * k, src_indices, dst_indices, use_compat_graph=use_compat_graph
-    )
diff --git a/python/nx-cugraph/nx_cugraph/generators/ego.py b/python/nx-cugraph/nx_cugraph/generators/ego.py
deleted file mode 100644
index 9a91fa0b6c3..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/ego.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import math
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-import pylibcugraph as plc
-
-import nx_cugraph as nxcg
-
-from ..utils import _dtype_param, _get_float_dtype, index_dtype, networkx_algorithm
-
-__all__ = ["ego_graph"]
-
-
-@networkx_algorithm(
-    extra_params=_dtype_param, version_added="24.06", _plc={"bfs", "ego_graph", "sssp"}
-)
-def ego_graph(
-    G, n, radius=1, center=True, undirected=False, distance=None, *, dtype=None
-):
-    """Weighted ego_graph with negative cycles is not yet supported. `NotImplementedError` will be raised if there are negative `distance` edge weights."""  # noqa: E501
-    if isinstance(G, nx.Graph):
-        is_compat_graph = isinstance(G, nxcg.Graph)
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    else:
-        is_compat_graph = False
-    if n not in G:
-        if distance is None:
-            raise nx.NodeNotFound(f"Source {n} is not in G")
-        raise nx.NodeNotFound(f"Node {n} not found in graph")
-    src_index = n if G.key_to_id is None else G.key_to_id[n]
-    symmetrize = "union" if undirected and G.is_directed() else None
-    if distance is None or distance not in G.edge_values:
-        # Simple BFS to determine nodes
-        if radius is not None and radius <= 0:
-            if center:
-                node_ids = cp.array([src_index], dtype=index_dtype)
-            else:
-                node_ids = cp.empty(0, dtype=index_dtype)
-            node_mask = None
-        else:
-            if radius is None or np.isinf(radius):
-                radius = -1
-            else:
-                radius = math.ceil(radius)
-            distances, unused_predecessors, node_ids = plc.bfs(
-                handle=plc.ResourceHandle(),
-                graph=G._get_plc_graph(symmetrize=symmetrize),
-                sources=cp.array([src_index], index_dtype),
-                direction_optimizing=False,  # True for undirected only; what's best?
-                depth_limit=radius,
-                compute_predecessors=False,
-                do_expensive_check=False,
-            )
-            node_mask = distances != np.iinfo(distances.dtype).max
-    else:
-        # SSSP to determine nodes
-        if callable(distance):
-            raise NotImplementedError("callable `distance` argument is not supported")
-        if symmetrize and G.is_multigraph():
-            # G._get_plc_graph does not implement `symmetrize=True` w/ edge array
-            raise NotImplementedError(
-                "Weighted ego_graph with undirected=True not implemented"
-            )
-        # Check for negative values since we don't support negative cycles
-        edge_vals = G.edge_values[distance]
-        if distance in G.edge_masks:
-            edge_vals = edge_vals[G.edge_masks[distance]]
-        if (edge_vals < 0).any():
-            raise NotImplementedError(
-                "Negative edge weights not yet supported by ego_graph"
-            )
-        # PERF: we could use BFS if all edges are equal
-        if radius is None:
-            radius = np.inf
-        dtype = _get_float_dtype(dtype, graph=G, weight=distance)
-        node_ids, distances, unused_predecessors = plc.sssp(
-            resource_handle=plc.ResourceHandle(),
-            graph=(G.to_undirected() if symmetrize else G)._get_plc_graph(
-                distance, 1, dtype
-            ),
-            source=src_index,
-            cutoff=np.nextafter(radius, np.inf, dtype=np.float64),
-            compute_predecessors=True,  # TODO: False is not yet supported
-            do_expensive_check=False,
-        )
-        node_mask = distances != np.finfo(distances.dtype).max
-
-    if node_mask is not None:
-        if not center:
-            node_mask &= node_ids != src_index
-        node_ids = node_ids[node_mask]
-    if node_ids.size == G._N:
-        rv = G.copy()
-        if is_compat_graph:
-            return rv._to_compat_graph()
-        return rv
-    # TODO: create renumbering helper function(s)
-    node_ids.sort()  # TODO: is this ever necessary? Keep for safety
-    node_values = {key: val[node_ids] for key, val in G.node_values.items()}
-    node_masks = {key: val[node_ids] for key, val in G.node_masks.items()}
-
-    G._sort_edge_indices()  # TODO: is this ever necessary? Keep for safety
-    edge_mask = cp.isin(G.src_indices, node_ids) & cp.isin(G.dst_indices, node_ids)
-    src_indices = cp.searchsorted(node_ids, G.src_indices[edge_mask]).astype(
-        index_dtype
-    )
-    dst_indices = cp.searchsorted(node_ids, G.dst_indices[edge_mask]).astype(
-        index_dtype
-    )
-    edge_values = {key: val[edge_mask] for key, val in G.edge_values.items()}
-    edge_masks = {key: val[edge_mask] for key, val in G.edge_masks.items()}
-
-    # Renumber nodes
-    if (id_to_key := G.id_to_key) is not None:
-        key_to_id = {
-            id_to_key[old_index]: new_index
-            for new_index, old_index in enumerate(node_ids.tolist())
-        }
-    else:
-        key_to_id = {
-            old_index: new_index
-            for new_index, old_index in enumerate(node_ids.tolist())
-        }
-    kwargs = {
-        "N": node_ids.size,
-        "src_indices": src_indices,
-        "dst_indices": dst_indices,
-        "edge_values": edge_values,
-        "edge_masks": edge_masks,
-        "node_values": node_values,
-        "node_masks": node_masks,
-        "key_to_id": key_to_id,
-        "use_compat_graph": False,
-    }
-    if G.is_multigraph():
-        if G.edge_keys is not None:
-            kwargs["edge_keys"] = [
-                x for x, m in zip(G.edge_keys, edge_mask.tolist()) if m
-            ]
-        if G.edge_indices is not None:
-            kwargs["edge_indices"] = G.edge_indices[edge_mask]
-    rv = G.__class__.from_coo(**kwargs)
-    rv.graph.update(G.graph)
-    if is_compat_graph:
-        return rv._to_compat_graph()
-    return rv
-
-
-@ego_graph._can_run
-def _(G, n, radius=1, center=True, undirected=False, distance=None, *, dtype=None):
-    if distance is not None and undirected and G.is_directed() and G.is_multigraph():
-        return "Weighted ego_graph with undirected=True not implemented"
-    if distance is not None and nx.is_negatively_weighted(G, weight=distance):
-        return "Weighted ego_graph with negative cycles not yet supported"
-    if callable(distance):
-        return "callable `distance` argument is not supported"
-    return True
diff --git a/python/nx-cugraph/nx_cugraph/generators/small.py b/python/nx-cugraph/nx_cugraph/generators/small.py
deleted file mode 100644
index d0c03cb7dd4..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/small.py
+++ /dev/null
@@ -1,630 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import index_dtype, networkx_algorithm
-from ._utils import _IS_NX32_OR_LESS, _create_using_class
-
-__all__ = [
-    "bull_graph",
-    "chvatal_graph",
-    "cubical_graph",
-    "desargues_graph",
-    "diamond_graph",
-    "dodecahedral_graph",
-    "frucht_graph",
-    "heawood_graph",
-    "house_graph",
-    "house_x_graph",
-    "icosahedral_graph",
-    "krackhardt_kite_graph",
-    "moebius_kantor_graph",
-    "octahedral_graph",
-    "pappus_graph",
-    "petersen_graph",
-    "sedgewick_maze_graph",
-    "tetrahedral_graph",
-    "truncated_cube_graph",
-    "truncated_tetrahedron_graph",
-    "tutte_graph",
-]
-
-
-@networkx_algorithm(version_added="23.12")
-def bull_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array([0, 0, 1, 1, 1, 2, 2, 2, 3, 4], index_dtype)
-    dst_indices = cp.array([1, 2, 0, 2, 3, 0, 1, 4, 1, 2], index_dtype)
-    G = graph_class.from_coo(5, src_indices, dst_indices, name="Bull Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def chvatal_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
-            6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11,
-            11, 11,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 4, 6, 9, 0, 2, 5, 7, 1, 3, 6, 8, 2, 4, 7, 9, 0, 3, 5, 8, 1, 4, 10, 11,
-            0, 2, 10, 11, 1, 3, 8, 11, 2, 4, 7, 10, 0, 3, 10, 11, 5, 6, 8, 9, 5, 6,
-            7, 9,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(12, src_indices, dst_indices, name="Chvatal Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def cubical_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [1, 3, 4, 0, 2, 7, 1, 3, 6, 0, 2, 5, 0, 5, 7, 3, 4, 6, 2, 5, 7, 1, 4, 6],
-        index_dtype,
-    )
-    name = ("Platonic Cubical Graph",) if _IS_NX32_OR_LESS else "Platonic Cubical Graph"
-    G = graph_class.from_coo(8, src_indices, dst_indices, name=name)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def desargues_graph(create_using=None):
-    # This can also be defined w.r.t. LCF_graph
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14,
-            14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 5, 19, 0, 2, 16, 1, 3, 11, 2, 4, 14, 3, 5, 9, 0, 4, 6, 5, 7, 15, 6, 8,
-            18, 7, 9, 13, 4, 8, 10, 9, 11, 19, 2, 10, 12, 11, 13, 17, 8, 12, 14, 3,
-            13, 15, 6, 14, 16, 1, 15, 17, 12, 16, 18, 7, 17, 19, 0, 10, 18,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    if graph_class.is_multigraph():
-        src_indices_extra = cp.array(
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
-            index_dtype,
-        )
-        dst_indices_extra = cp.array(
-            [5, 16, 11, 14, 9, 0, 15, 18, 13, 4, 19, 2, 17, 8, 3, 6, 1, 12, 7, 10],
-            index_dtype,
-        )
-        src_indices = cp.hstack((src_indices, src_indices_extra))
-        dst_indices = cp.hstack((dst_indices, dst_indices_extra))
-    G = graph_class.from_coo(20, src_indices, dst_indices, name="Desargues Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def diamond_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array([0, 0, 1, 1, 1, 2, 2, 2, 3, 3], index_dtype)
-    dst_indices = cp.array([1, 2, 0, 2, 3, 0, 1, 3, 1, 2], index_dtype)
-    G = graph_class.from_coo(4, src_indices, dst_indices, name="Diamond Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def dodecahedral_graph(create_using=None):
-    # This can also be defined w.r.t. LCF_graph
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14,
-            14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 10, 19, 0, 2, 8, 1, 3, 6, 2, 4, 19, 3, 5, 17, 4, 6, 15, 2, 5, 7, 6, 8,
-            14, 1, 7, 9, 8, 10, 13, 0, 9, 11, 10, 12, 18, 11, 13, 16, 9, 12, 14, 7,
-            13, 15, 5, 14, 16, 12, 15, 17, 4, 16, 18, 11, 17, 19, 0, 3, 18,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    if graph_class.is_multigraph():
-        src_indices_extra = cp.array(
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
-            index_dtype,
-        )
-        dst_indices_extra = cp.array(
-            [10, 8, 6, 19, 17, 15, 2, 14, 1, 13, 0, 18, 16, 9, 7, 5, 12, 4, 11, 3],
-            index_dtype,
-        )
-        src_indices = cp.hstack((src_indices, src_indices_extra))
-        dst_indices = cp.hstack((dst_indices, dst_indices_extra))
-    G = graph_class.from_coo(20, src_indices, dst_indices, name="Dodecahedral Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def frucht_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        src_indices = cp.array(
-            [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 10],
-            index_dtype,
-        )
-        dst_indices = cp.array(
-            [1, 7, 2, 7, 3, 8, 4, 9, 5, 9, 6, 10, 0, 10, 11, 9, 11, 11],
-            index_dtype,
-        )
-    else:
-        # fmt: off
-        src_indices = cp.array(
-            [
-                0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7,
-                7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11,
-            ],
-            index_dtype,
-        )
-        dst_indices = cp.array(
-            [
-                1, 6, 7, 0, 2, 7, 1, 3, 8, 2, 4, 9, 3, 5, 9, 4, 6, 10, 0, 5, 10, 0,
-                1, 11, 2, 9, 11, 3, 4, 8, 5, 6, 11, 7, 8, 10,
-            ],
-            index_dtype,
-        )
-        # fmt: on
-    G = graph_class.from_coo(12, src_indices, dst_indices, name="Frucht Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def heawood_graph(create_using=None):
-    # This can also be defined w.r.t. LCF_graph
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 5, 13, 0, 2, 10, 1, 3, 7, 2, 4, 12, 3, 5, 9, 0, 4, 6, 5, 7, 11, 2, 6,
-            8, 7, 9, 13, 4, 8, 10, 1, 9, 11, 6, 10, 12, 3, 11, 13, 0, 8, 12,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    if graph_class.is_multigraph():
-        src_indices_extra = cp.array(
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
-            index_dtype,
-        )
-        dst_indices_extra = cp.array(
-            [5, 10, 7, 12, 9, 0, 11, 2, 13, 4, 1, 6, 3, 8],
-            index_dtype,
-        )
-        src_indices = cp.hstack((src_indices, src_indices_extra))
-        dst_indices = cp.hstack((dst_indices, dst_indices_extra))
-    G = graph_class.from_coo(14, src_indices, dst_indices, name="Heawood Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def house_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array([0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4], index_dtype)
-    dst_indices = cp.array([1, 2, 0, 3, 0, 3, 4, 1, 2, 4, 2, 3], index_dtype)
-    G = graph_class.from_coo(5, src_indices, dst_indices, name="House Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def house_x_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array(
-        [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4], index_dtype
-    )
-    dst_indices = cp.array(
-        [1, 2, 3, 0, 2, 3, 0, 1, 3, 4, 0, 1, 2, 4, 2, 3], index_dtype
-    )
-    G = graph_class.from_coo(
-        5, src_indices, dst_indices, name="House-with-X-inside Graph"
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def icosahedral_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4,
-            4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9,
-            9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 5, 7, 8, 11, 0, 2, 5, 6, 8, 1, 3, 6, 8, 9, 2, 4, 6, 9, 10, 3, 5, 6,
-            10, 11, 0, 1, 4, 6, 11, 1, 2, 3, 4, 5, 0, 8, 9, 10, 11, 0, 1, 2, 7, 9, 2,
-            3, 7, 8, 10, 3, 4, 7, 9, 11, 0, 4, 5, 7, 10,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(
-        12, src_indices, dst_indices, name="Platonic Icosahedral Graph"
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def krackhardt_kite_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5,
-            5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 2, 3, 5, 0, 3, 4, 6, 0, 3, 5, 0, 1, 2, 4, 5, 6, 1, 3, 6, 0, 2, 3, 6,
-            7, 1, 3, 4, 5, 7, 5, 6, 8, 7, 9, 8,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(
-        10, src_indices, dst_indices, name="Krackhardt Kite Social Network"
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def moebius_kantor_graph(create_using=None):
-    # This can also be defined w.r.t. LCF_graph
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14,
-            14, 14, 15, 15, 15,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 5, 15, 0, 2, 12, 1, 3, 7, 2, 4, 14, 3, 5, 9, 0, 4, 6, 5, 7, 11, 2, 6,
-            8, 7, 9, 13, 4, 8, 10, 9, 11, 15, 6, 10, 12, 1, 11, 13, 8, 12, 14, 3, 13,
-            15, 0, 10, 14,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    if graph_class.is_multigraph():
-        src_indices_extra = cp.array(
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
-            index_dtype,
-        )
-        dst_indices_extra = cp.array(
-            [5, 12, 7, 14, 9, 0, 11, 2, 13, 4, 15, 6, 1, 8, 3, 10],
-            index_dtype,
-        )
-        src_indices = cp.hstack((src_indices, src_indices_extra))
-        dst_indices = cp.hstack((dst_indices, dst_indices_extra))
-    G = graph_class.from_coo(16, src_indices, dst_indices, name="Moebius-Kantor Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def octahedral_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    src_indices = cp.array(
-        [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 4, 5, 0, 1, 4, 5, 0, 2, 3, 5, 1, 2, 3, 4],
-        index_dtype,
-    )
-    G = graph_class.from_coo(
-        6, src_indices, dst_indices, name="Platonic Octahedral Graph"
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def pappus_graph():
-    # This can also be defined w.r.t. LCF_graph
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14,
-            14, 15, 15, 15, 16, 16, 16, 17, 17, 17,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 5, 17, 0, 2, 8, 1, 3, 13, 2, 4, 10, 3, 5, 15, 0, 4, 6, 5, 7, 11, 6, 8,
-            14, 1, 7, 9, 8, 10, 16, 3, 9, 11, 6, 10, 12, 11, 13, 17, 2, 12, 14, 7,
-            13, 15, 4, 14, 16, 9, 15, 17, 0, 12, 16,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        18,
-        src_indices,
-        dst_indices,
-        name="Pappus Graph",
-        use_compat_graph=use_compat_graph,
-    )
-
-
-@networkx_algorithm(version_added="23.12")
-def petersen_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 4, 5, 0, 2, 6, 1, 3, 7, 2, 4, 8, 0, 3, 9, 0, 7, 8, 1, 8, 9, 2, 5, 9,
-            3, 5, 6, 4, 6, 7,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(10, src_indices, dst_indices, name="Petersen Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def sedgewick_maze_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        src_indices = cp.array([0, 0, 0, 1, 2, 3, 3, 4, 4, 4], index_dtype)
-        dst_indices = cp.array([2, 5, 7, 7, 6, 4, 5, 5, 6, 7], index_dtype)
-    else:
-        src_indices = cp.array(
-            [0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7],
-            index_dtype,
-        )
-        dst_indices = cp.array(
-            [2, 5, 7, 7, 0, 6, 4, 5, 3, 5, 6, 7, 0, 3, 4, 2, 4, 0, 1, 4],
-            index_dtype,
-        )
-    G = graph_class.from_coo(8, src_indices, dst_indices, name="Sedgewick Maze")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def tetrahedral_graph(create_using=None):
-    # This can also be defined w.r.t. complete_graph
-    graph_class, inplace = _create_using_class(create_using)
-    src_indices = cp.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], index_dtype)
-    dst_indices = cp.array([1, 2, 3, 0, 2, 3, 0, 1, 3, 0, 1, 2], index_dtype)
-    name = (
-        "Platonic Tetrahedral graph"
-        if _IS_NX32_OR_LESS
-        else "Platonic Tetrahedral Graph"
-    )
-    G = graph_class.from_coo(4, src_indices, dst_indices, name=name)
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def truncated_cube_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14,
-            14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20,
-            20, 21, 21, 21, 22, 22, 22, 23, 23, 23,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 2, 4, 0, 11, 14, 0, 3, 4, 2, 6, 8, 0, 2, 5, 4, 16, 18, 3, 7, 8, 6, 10,
-            12, 3, 6, 9, 8, 17, 20, 7, 11, 12, 1, 10, 14, 7, 10, 13, 12, 21, 22, 1,
-            11, 15, 14, 19, 23, 5, 17, 18, 9, 16, 20, 5, 16, 19, 15, 18, 23, 9, 17,
-            21, 13, 20, 22, 13, 21, 23, 15, 19, 22,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(24, src_indices, dst_indices, name="Truncated Cube Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def truncated_tetrahedron_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        src_indices = cp.array(
-            [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 10], index_dtype
-        )
-        dst_indices = cp.array(
-            [1, 2, 9, 2, 6, 3, 4, 11, 5, 11, 6, 7, 7, 8, 9, 10, 10, 11], index_dtype
-        )
-    else:
-        # fmt: off
-        src_indices = cp.array(
-            [
-                0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7,
-                7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11,
-            ],
-            index_dtype,
-        )
-        dst_indices = cp.array(
-            [
-                1, 2, 9, 0, 2, 6, 0, 1, 3, 2, 4, 11, 3, 5, 11, 4, 6, 7, 1, 5, 7, 5,
-                6, 8, 7, 9, 10, 0, 8, 10, 8, 9, 11, 3, 4, 10,
-            ],
-            index_dtype,
-        )
-        # fmt: on
-    G = graph_class.from_coo(
-        12, src_indices, dst_indices, name="Truncated Tetrahedron Graph"
-    )
-    if inplace:
-        return create_using._become(G)
-    return G
-
-
-@networkx_algorithm(version_added="23.12")
-def tutte_graph(create_using=None):
-    graph_class, inplace = _create_using_class(create_using)
-    if graph_class.is_directed():
-        raise nx.NetworkXError("Directed Graph not supported")
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7,
-            8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14,
-            14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20,
-            20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26,
-            26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32,
-            32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38,
-            38, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43, 44, 44,
-            44, 45, 45, 45,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 2, 3, 0, 4, 26, 0, 10, 11, 0, 18, 19, 1, 5, 33, 4, 6, 29, 5, 7, 27, 6,
-            8, 14, 7, 9, 38, 8, 10, 37, 2, 9, 39, 2, 12, 39, 11, 13, 35, 12, 14, 15,
-            7, 13, 34, 13, 16, 22, 15, 17, 44, 16, 18, 43, 3, 17, 45, 3, 20, 45, 19,
-            21, 41, 20, 22, 23, 15, 21, 40, 21, 24, 27, 23, 25, 32, 24, 26, 31, 1,
-            25, 33, 6, 23, 28, 27, 29, 32, 5, 28, 30, 29, 31, 33, 25, 30, 32, 24, 28,
-            31, 4, 26, 30, 14, 35, 38, 12, 34, 36, 35, 37, 39, 9, 36, 38, 8, 34, 37,
-            10, 11, 36, 22, 41, 44, 20, 40, 42, 41, 43, 45, 17, 42, 44, 16, 40, 43,
-            18, 19, 42,
-        ],
-        index_dtype,
-    )
-    # fmt: on
-    G = graph_class.from_coo(46, src_indices, dst_indices, name="Tutte's Graph")
-    if inplace:
-        return create_using._become(G)
-    return G
diff --git a/python/nx-cugraph/nx_cugraph/generators/social.py b/python/nx-cugraph/nx_cugraph/generators/social.py
deleted file mode 100644
index 09d405e7561..00000000000
--- a/python/nx-cugraph/nx_cugraph/generators/social.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from ..utils import index_dtype, networkx_algorithm
-
-__all__ = [
-    "davis_southern_women_graph",
-    "florentine_families_graph",
-    "karate_club_graph",
-    "les_miserables_graph",
-]
-
-
-@networkx_algorithm(version_added="23.12")
-def davis_southern_women_graph():
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3,
-            3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8,
-            8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12,
-            12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15,
-            16, 16, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 21,
-            21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
-            24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25,
-            25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
-            27, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 30, 30, 30,
-            31, 31, 31,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            18, 19, 20, 21, 22, 23, 25, 26, 18, 19, 20, 22, 23, 24, 25, 19, 20, 21,
-            22, 23, 24, 25, 26, 18, 20, 21, 22, 23, 24, 25, 20, 21, 22, 24, 20, 22,
-            23, 25, 22, 23, 24, 25, 23, 25, 26, 22, 24, 25, 26, 24, 25, 26, 29, 25,
-            26, 27, 29, 25, 26, 27, 29, 30, 31, 24, 25, 26, 27, 29, 30, 31, 23, 24,
-            26, 27, 28, 29, 30, 31, 24, 25, 27, 28, 29, 25, 26, 26, 28, 26, 28, 0, 1,
-            3, 0, 1, 2, 0, 1, 2, 3, 4, 5, 0, 2, 3, 4, 0, 1, 2, 3, 4, 5, 6, 8, 0, 1,
-            2, 3, 5, 6, 7, 13, 1, 2, 3, 4, 6, 8, 9, 12, 13, 14, 0, 1, 2, 3, 5, 6, 7,
-            8, 9, 10, 11, 12, 14, 15, 0, 2, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 10,
-            11, 12, 13, 14, 13, 14, 16, 17, 9, 10, 11, 12, 13, 14, 11, 12, 13, 11,
-            12, 13,
-        ],
-        index_dtype,
-    )
-    bipartite = cp.array(
-        [
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-            1, 1, 1, 1, 1, 1, 1, 1,
-        ],
-        np.int8,
-    )
-    women = [
-        "Evelyn Jefferson", "Laura Mandeville", "Theresa Anderson", "Brenda Rogers",
-        "Charlotte McDowd", "Frances Anderson", "Eleanor Nye", "Pearl Oglethorpe",
-        "Ruth DeSand", "Verne Sanderson", "Myra Liddel", "Katherina Rogers",
-        "Sylvia Avondale", "Nora Fayette", "Helen Lloyd", "Dorothy Murchison",
-        "Olivia Carleton", "Flora Price",
-    ]
-    events = [
-        "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "E10", "E11", "E12",
-        "E13", "E14",
-    ]
-    # fmt: on
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        32,
-        src_indices,
-        dst_indices,
-        node_values={"bipartite": bipartite},
-        id_to_key=women + events,
-        top=women,
-        bottom=events,
-        use_compat_graph=use_compat_graph,
-    )
-
-
-@networkx_algorithm(version_added="23.12")
-def florentine_families_graph():
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8,
-            9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            8, 5, 6, 8, 4, 8, 6, 10, 13, 2, 10, 13, 1, 1, 3, 7, 14, 6, 0, 1, 2, 11,
-            12, 14, 12, 3, 4, 13, 8, 13, 14, 8, 9, 3, 4, 10, 11, 6, 8, 11,
-        ],
-        index_dtype,
-    )
-    nodes = [
-        "Acciaiuoli", "Albizzi", "Barbadori", "Bischeri", "Castellani", "Ginori",
-        "Guadagni", "Lamberteschi", "Medici", "Pazzi", "Peruzzi", "Ridolfi",
-        "Salviati", "Strozzi", "Tornabuoni"
-    ]
-    # fmt: on
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        15,
-        src_indices,
-        dst_indices,
-        id_to_key=nodes,
-        use_compat_graph=use_compat_graph,
-    )
-
-
-@networkx_algorithm(version_added="23.12")
-def karate_club_graph():
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-            1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5,
-            6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 10, 10, 10, 11, 12, 12, 13,
-            13, 13, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20,
-            20, 21, 21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26,
-            27, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31,
-            31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33,
-            33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13,
-            17, 19, 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0,
-            6, 10, 0, 6, 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33,
-            0, 4, 5, 0, 0, 3, 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0,
-            1, 33, 32, 33, 0, 1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31,
-            29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, 32, 33, 1, 8, 32, 33, 0, 24,
-            25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33, 8, 9, 13,
-            14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32,
-        ],
-        index_dtype,
-    )
-    weights = cp.array(
-        [
-            4, 5, 3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 2, 2, 2, 4, 6, 3, 4, 5, 1, 2, 2,
-            2, 5, 6, 3, 4, 5, 1, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 5, 3, 3,
-            3, 2, 5, 3, 2, 4, 4, 3, 2, 5, 3, 3, 4, 1, 2, 2, 3, 3, 3, 1, 3, 3, 5, 3,
-            3, 3, 3, 2, 3, 4, 3, 3, 2, 1, 1, 2, 2, 2, 1, 3, 1, 2, 2, 2, 3, 5, 4, 3,
-            5, 4, 2, 3, 2, 5, 2, 7, 4, 2, 2, 4, 3, 4, 2, 2, 2, 3, 4, 4, 2, 2, 3, 3,
-            3, 2, 2, 7, 2, 4, 4, 2, 3, 3, 3, 1, 3, 2, 5, 4, 3, 4, 5, 4, 2, 3, 2, 4,
-            2, 1, 1, 3, 4, 2, 4, 2, 2, 3, 4, 5,
-        ],
-        np.int8,
-    )
-    # For now, cupy doesn't handle str dtypes and we primarily handle cupy arrays.
-    # We try to support numpy arrays for node values, so let's use numpy here.
-    clubs = np.array([
-        "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi",
-        "Mr. Hi", "Mr. Hi", "Officer", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi",
-        "Officer", "Officer", "Mr. Hi", "Mr. Hi", "Officer", "Mr. Hi", "Officer",
-        "Mr. Hi", "Officer", "Officer", "Officer", "Officer", "Officer", "Officer",
-        "Officer", "Officer", "Officer", "Officer", "Officer", "Officer",
-    ])
-    # fmt: on
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        34,
-        src_indices,
-        dst_indices,
-        edge_values={"weight": weights},
-        node_values={"club": clubs},
-        name="Zachary's Karate Club",
-        use_compat_graph=use_compat_graph,
-    )
-
-
-@networkx_algorithm(version_added="23.12")
-def les_miserables_graph():
-    # fmt: off
-    src_indices = cp.array(
-        [
-            0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-            2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6,
-            6, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10,
-            10, 10, 10, 10, 11, 12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15,
-            15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17,
-            17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19,
-            20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 23, 23, 23,
-            23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
-            24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26,
-            26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28,
-            28, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30,
-            30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
-            31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35,
-            35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
-            38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
-            40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 42, 42, 42, 42,
-            42, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46,
-            46, 46, 46, 46, 46, 46, 47, 47, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49,
-            49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 51, 51, 51, 51,
-            51, 51, 51, 52, 53, 53, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 57,
-            57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59,
-            59, 59, 59, 60, 60, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 64,
-            65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 69, 69, 69,
-            69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 71,
-            71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73,
-            73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
-            73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 75, 75, 75, 76, 76,
-            76, 76, 76, 76, 76,
-        ],
-        index_dtype,
-    )
-    dst_indices = cp.array(
-        [
-            25, 58, 70, 9, 15, 25, 31, 37, 39, 58, 59, 70, 73, 6, 17, 21, 24, 30, 31,
-            35, 40, 46, 49, 55, 67, 8, 10, 12, 16, 27, 39, 42, 73, 34, 49, 23, 26,
-            27, 29, 44, 71, 76, 2, 17, 21, 24, 30, 31, 35, 40, 46, 49, 55, 67, 73,
-            70, 3, 10, 12, 16, 42, 73, 1, 15, 25, 31, 37, 59, 70, 3, 8, 12, 16, 42,
-            73, 62, 3, 8, 10, 16, 42, 73, 14, 31, 13, 31, 1, 9, 24, 25, 37, 39, 58,
-            59, 70, 73, 3, 8, 10, 12, 42, 73, 2, 6, 21, 24, 30, 31, 35, 40, 46, 49,
-            67, 34, 39, 45, 49, 51, 58, 70, 71, 72, 73, 75, 62, 62, 2, 6, 17, 24, 25,
-            30, 31, 35, 40, 46, 49, 55, 67, 62, 5, 26, 27, 29, 44, 71, 76, 2, 6, 15,
-            17, 21, 30, 31, 35, 39, 40, 46, 49, 55, 67, 73, 0, 1, 9, 15, 21, 37, 46,
-            49, 58, 59, 70, 5, 23, 27, 29, 44, 71, 76, 3, 5, 23, 26, 29, 39, 44, 48,
-            58, 65, 69, 70, 71, 73, 76, 36, 39, 60, 73, 5, 23, 26, 27, 44, 71, 76, 2,
-            6, 17, 21, 24, 31, 35, 40, 46, 49, 67, 1, 2, 6, 9, 13, 14, 17, 21, 24,
-            30, 35, 37, 39, 40, 46, 49, 53, 55, 59, 67, 70, 73, 62, 73, 4, 18, 45,
-            47, 49, 51, 73, 2, 6, 17, 21, 24, 30, 31, 40, 55, 67, 28, 1, 9, 15, 25,
-            31, 39, 58, 59, 70, 73, 73, 1, 3, 15, 18, 24, 27, 28, 31, 37, 58, 59, 69,
-            70, 72, 73, 74, 75, 2, 6, 17, 21, 24, 30, 31, 35, 46, 49, 55, 67, 53, 3,
-            8, 10, 12, 16, 73, 73, 5, 23, 26, 27, 29, 71, 76, 18, 34, 49, 51, 2, 6,
-            17, 21, 24, 25, 30, 31, 40, 49, 61, 34, 58, 27, 73, 2, 4, 6, 17, 18, 21,
-            24, 25, 30, 31, 34, 40, 45, 46, 51, 66, 70, 71, 73, 56, 62, 73, 18, 34,
-            45, 49, 52, 57, 73, 51, 31, 41, 73, 2, 6, 21, 24, 31, 35, 40, 50, 62, 73,
-            51, 66, 0, 1, 15, 18, 25, 27, 37, 39, 47, 70, 73, 1, 9, 15, 25, 31, 37,
-            39, 70, 73, 28, 73, 46, 11, 19, 20, 22, 32, 50, 56, 63, 64, 73, 62, 62,
-            27, 69, 49, 57, 70, 2, 6, 17, 21, 24, 30, 31, 35, 40, 73, 27, 39, 65, 73,
-            0, 1, 7, 9, 15, 18, 25, 27, 31, 37, 39, 49, 58, 59, 66, 73, 5, 18, 23,
-            26, 27, 29, 44, 49, 76, 18, 39, 73, 1, 3, 6, 8, 10, 12, 15, 16, 18, 24,
-            27, 28, 31, 33, 34, 37, 38, 39, 42, 43, 48, 49, 50, 51, 54, 56, 58, 59,
-            60, 62, 68, 69, 70, 72, 74, 75, 39, 73, 18, 39, 73, 5, 23, 26, 27, 29,
-            44, 71,
-        ],
-        index_dtype,
-    )
-    weights = cp.array(
-        [
-            2, 1, 2, 3, 4, 1, 1, 6, 2, 1, 2, 6, 1, 4, 5, 6, 4, 3, 5, 1, 5, 2, 1, 1,
-            2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 3, 4, 3, 4, 4, 4, 3, 4, 9, 12, 10, 6, 5,
-            3, 7, 1, 5, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 3, 1, 3, 2, 2, 2,
-            2, 3, 3, 1, 1, 2, 2, 2, 2, 2, 3, 2, 3, 2, 4, 1, 1, 1, 4, 1, 1, 2, 4, 1,
-            1, 2, 2, 2, 2, 2, 5, 9, 13, 15, 5, 6, 1, 5, 2, 5, 2, 3, 1, 1, 21, 2, 4,
-            1, 1, 2, 31, 1, 2, 1, 6, 12, 13, 17, 1, 6, 7, 2, 5, 2, 9, 1, 3, 1, 3, 3,
-            4, 5, 3, 3, 4, 4, 10, 1, 15, 17, 6, 7, 3, 6, 5, 1, 7, 1, 4, 4, 2, 1, 1,
-            1, 1, 1, 1, 5, 2, 1, 3, 4, 3, 3, 3, 4, 4, 3, 1, 3, 4, 3, 4, 5, 3, 2, 2,
-            1, 2, 1, 3, 9, 4, 2, 1, 3, 8, 4, 5, 3, 4, 3, 3, 4, 3, 6, 5, 6, 6, 2, 1,
-            5, 1, 1, 2, 1, 5, 5, 1, 2, 2, 6, 7, 7, 2, 1, 1, 1, 3, 1, 4, 2, 1, 1, 1,
-            1, 1, 1, 1, 1, 3, 1, 1, 12, 9, 2, 1, 3, 1, 2, 3, 1, 1, 2, 1, 1, 2, 6, 3,
-            4, 1, 1, 1, 1, 2, 5, 1, 1, 2, 1, 1, 1, 6, 5, 1, 1, 1, 1, 1, 1, 5, 1, 17,
-            1, 1, 5, 7, 5, 5, 5, 5, 3, 2, 1, 2, 1, 2, 1, 2, 2, 3, 2, 2, 3, 1, 4, 3,
-            4, 3, 3, 4, 3, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 2, 1,
-            1, 1, 5, 5, 21, 9, 7, 5, 1, 4, 12, 2, 1, 1, 6, 1, 2, 1, 19, 6, 8, 3, 2,
-            9, 2, 6, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 10, 3, 1, 1, 1, 1,
-            1, 4, 2, 2, 1, 1, 1, 13, 7, 2, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 3, 1, 2, 1,
-            1, 1, 8, 10, 1, 1, 5, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 3, 4, 2, 1, 1, 2, 1,
-            2, 1, 2, 3, 2, 6, 1, 3, 4, 1, 3, 1, 1, 5, 5, 2, 13, 1, 1, 12, 4, 1, 3, 4,
-            3, 3, 4, 1, 3, 2, 1, 1, 1, 2, 1, 2, 3, 2, 1, 2, 31, 4, 9, 8, 1, 1, 2, 1,
-            1, 17, 3, 1, 1, 19, 3, 2, 1, 3, 7, 1, 1, 5, 1, 3, 12, 1, 2, 3, 1, 2, 1,
-            1, 3, 3, 4, 3, 4, 4, 3, 3,
-        ],
-        np.int8,
-    )
-    nodes = [
-        "Anzelma", "Babet", "Bahorel", "Bamatabois", "BaronessT", "Blacheville",
-        "Bossuet", "Boulatruelle", "Brevet", "Brujon", "Champmathieu",
-        "Champtercier", "Chenildieu", "Child1", "Child2", "Claquesous",
-        "Cochepaille", "Combeferre", "Cosette", "Count", "CountessDeLo",
-        "Courfeyrac", "Cravatte", "Dahlia", "Enjolras", "Eponine", "Fameuil",
-        "Fantine", "Fauchelevent", "Favourite", "Feuilly", "Gavroche", "Geborand",
-        "Gervais", "Gillenormand", "Grantaire", "Gribier", "Gueulemer", "Isabeau",
-        "Javert", "Joly", "Jondrette", "Judge", "Labarre", "Listolier",
-        "LtGillenormand", "Mabeuf", "Magnon", "Marguerite", "Marius",
-        "MlleBaptistine", "MlleGillenormand", "MlleVaubois", "MmeBurgon", "MmeDeR",
-        "MmeHucheloup", "MmeMagloire", "MmePontmercy", "MmeThenardier",
-        "Montparnasse", "MotherInnocent", "MotherPlutarch", "Myriel", "Napoleon",
-        "OldMan", "Perpetue", "Pontmercy", "Prouvaire", "Scaufflaire", "Simplice",
-        "Thenardier", "Tholomyes", "Toussaint", "Valjean", "Woman1", "Woman2",
-        "Zephine",
-    ]
-    # fmt: on
-    use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    return nxcg.CudaGraph.from_coo(
-        77,
-        src_indices,
-        dst_indices,
-        edge_values={"weight": weights},
-        id_to_key=nodes,
-        use_compat_graph=use_compat_graph,
-    )
diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py
deleted file mode 100644
index 1a3d08409a2..00000000000
--- a/python/nx-cugraph/nx_cugraph/interface.py
+++ /dev/null
@@ -1,486 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import os
-import sys
-
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-
-class BackendInterface:
-    # Required conversions
-    @staticmethod
-    def convert_from_nx(graph, *args, edge_attrs=None, weight=None, **kwargs):
-        if weight is not None:
-            # MAINT: networkx 3.0, 3.1
-            # For networkx 3.0 and 3.1 compatibility
-            if edge_attrs is not None:
-                raise TypeError(
-                    "edge_attrs and weight arguments should not both be given"
-                )
-            edge_attrs = {weight: 1}
-        return nxcg.from_networkx(
-            graph,
-            *args,
-            edge_attrs=edge_attrs,
-            use_compat_graph=_nxver < (3, 3)
-            or nx.config.backends.cugraph.use_compat_graphs,
-            **kwargs,
-        )
-
-    @staticmethod
-    def convert_to_nx(obj, *, name: str | None = None):
-        if isinstance(obj, nxcg.CudaGraph):
-            # Observe that this does not try to convert Graph!
-            return nxcg.to_networkx(obj)
-        return obj
-
-    @staticmethod
-    def on_start_tests(items):
-        """Modify pytest items after tests have been collected.
-
-        This is called during ``pytest_collection_modifyitems`` phase of pytest.
-        We use this to set `xfail` on tests we expect to fail. See:
-
-        https://docs.pytest.org/en/stable/reference/reference.html#std-hook-pytest_collection_modifyitems
-        """
-        try:
-            import pytest
-        except ModuleNotFoundError:
-            return
-
-        def key(testpath):
-            filename, path = testpath.split(":")
-            *names, testname = path.split(".")
-            if names:
-                [classname] = names
-                return (testname, frozenset({classname, filename}))
-            return (testname, frozenset({filename}))
-
-        use_compat_graph = (
-            _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-        )
-        fallback = use_compat_graph or nx.utils.backends._dispatchable._fallback_to_nx
-
-        # Reasons for xfailing
-        # For nx version <= 3.1
-        no_weights = "weighted implementation not currently supported"
-        no_multigraph = "multigraphs not currently supported"
-        # For nx version <= 3.2
-        nx_cugraph_in_test_setup = (
-            "nx-cugraph Graph is incompatible in test setup in nx versions < 3.3"
-        )
-        # For all versions
-        louvain_different = "Louvain may be different due to RNG"
-        sssp_path_different = "sssp may choose a different valid path"
-        tuple_elements_preferred = "elements are tuples instead of lists"
-        no_mixed_dtypes_for_nodes = (
-            # This one is tricky b/c we don't raise; all dtypes are treated as str
-            "mixed dtypes (str, int, float) for single node property not supported"
-        )
-        # These shouldn't fail if using Graph or falling back to networkx
-        no_string_dtype = "string edge values not currently supported"
-        no_object_dtype_for_edges = (
-            "Edges don't support object dtype (lists, strings, etc.)"
-        )
-
-        xfail = {
-            # This is removed while strongly_connected_components() is not
-            # dispatchable. See algorithms/components/strongly_connected.py for
-            # details.
-            #
-            # key(
-            #     "test_strongly_connected.py:"
-            #     "TestStronglyConnected.test_condensation_mapping_and_members"
-            # ): "Strongly connected groups in different iteration order",
-            key(
-                "test_cycles.py:TestMinimumCycleBasis.test_unweighted_diamond"
-            ): sssp_path_different,
-            key(
-                "test_cycles.py:TestMinimumCycleBasis.test_weighted_diamond"
-            ): sssp_path_different,
-            key(
-                "test_cycles.py:TestMinimumCycleBasis.test_petersen_graph"
-            ): sssp_path_different,
-            key(
-                "test_cycles.py:TestMinimumCycleBasis."
-                "test_gh6787_and_edge_attribute_names"
-            ): sssp_path_different,
-            key(
-                "test_relabel.py:"
-                "test_relabel_preserve_node_order_partial_mapping_with_copy_false"
-            ): "Node order is preserved when relabeling with partial mapping",
-            key(
-                "test_gml.py:"
-                "TestPropertyLists.test_reading_graph_with_single_element_list_property"
-            ): tuple_elements_preferred,
-        }
-        if not fallback:
-            xfail.update(
-                {
-                    key(
-                        "test_graph_hashing.py:test_isomorphic_edge_attr"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_graph_hashing.py:"
-                        "test_isomorphic_edge_attr_and_node_attr_subgraph_hash"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_summarization.py:TestSNAPUndirected.test_summary_graph"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_summarization.py:TestSNAPDirected.test_summary_graph"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gexf.py:TestGEXF.test_relabel"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gml.py:TestGraph.test_parse_gml_cytoscape_bug"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gml.py:TestGraph.test_parse_gml"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gml.py:TestGraph.test_read_gml"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gml.py:TestGraph.test_data_types"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_gml.py:"
-                        "TestPropertyLists.test_reading_graph_with_list_property"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multidigraph_inout_merge_nodes"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multigraph_merge_inplace"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multidigraph_merge_inplace"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multidigraph_inout_copy"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multidigraph_merge_copy"
-                    ): no_string_dtype,
-                    key(
-                        "test_relabel.py:"
-                        "TestRelabel.test_relabel_multigraph_nonnumeric_key"
-                    ): no_string_dtype,
-                    key(
-                        "test_contraction.py:test_multigraph_path"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_contraction.py:test_directed_multigraph_path"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_contraction.py:test_multigraph_blockmodel"
-                    ): no_object_dtype_for_edges,
-                    key(
-                        "test_summarization.py:"
-                        "TestSNAPUndirectedMulti.test_summary_graph"
-                    ): no_string_dtype,
-                    key(
-                        "test_summarization.py:TestSNAPDirectedMulti.test_summary_graph"
-                    ): no_string_dtype,
-                }
-            )
-        else:
-            xfail.update(
-                {
-                    key(
-                        "test_gml.py:"
-                        "TestPropertyLists.test_reading_graph_with_list_property"
-                    ): no_mixed_dtypes_for_nodes,
-                }
-            )
-
-        if _nxver <= (3, 2):
-            xfail.update(
-                {
-                    # NetworkX versions prior to 3.2.1 have tests written to
-                    # expect sp.sparse.linalg.ArpackNoConvergence exceptions
-                    # raised on no convergence in HITS. Newer versions since
-                    # the merge of
-                    # https://github.com/networkx/networkx/pull/7084 expect
-                    # nx.PowerIterationFailedConvergence, which is what
-                    # nx_cugraph.hits raises, so we mark them as xfail for
-                    # previous versions of NX.
-                    key(
-                        "test_hits.py:TestHITS.test_hits_not_convergent"
-                    ): "nx_cugraph.hits raises updated exceptions not caught in "
-                    "these tests",
-                    # NetworkX versions 3.2 and older contain tests that fail
-                    # with pytest>=8. Assume pytest>=8 and mark xfail.
-                    key(
-                        "test_strongly_connected.py:"
-                        "TestStronglyConnected.test_connected_raise"
-                    ): "test is incompatible with pytest>=8",
-                    # NetworkX 3.3 introduced logic around functions that return graphs
-                    key(
-                        "test_vf2pp_helpers.py:TestGraphTinoutUpdating.test_updating"
-                    ): nx_cugraph_in_test_setup,
-                    key(
-                        "test_vf2pp_helpers.py:TestGraphTinoutUpdating.test_restoring"
-                    ): nx_cugraph_in_test_setup,
-                    key(
-                        "test_vf2pp_helpers.py:TestDiGraphTinoutUpdating.test_updating"
-                    ): nx_cugraph_in_test_setup,
-                    key(
-                        "test_vf2pp_helpers.py:TestDiGraphTinoutUpdating.test_restoring"
-                    ): nx_cugraph_in_test_setup,
-                }
-            )
-
-        if _nxver <= (3, 1):
-            # MAINT: networkx 3.0, 3.1
-            # NetworkX 3.2 added the ability to "fallback to nx" if backend algorithms
-            # raise NotImplementedError or `can_run` returns False. The tests below
-            # exercise behavior we have not implemented yet, so we mark them as xfail
-            # for previous versions of NX.
-            xfail.update(
-                {
-                    key(
-                        "test_agraph.py:TestAGraph.test_no_warnings_raised"
-                    ): "pytest.warn(None) deprecated",
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_K5"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_P3_normalized"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_P3"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_krackhardt_kite_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality."
-                        "test_krackhardt_kite_graph_normalized"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality."
-                        "test_florentine_families_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_les_miserables_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_ladder_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_G"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_G2"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_G3"
-                    ): no_multigraph,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedBetweennessCentrality.test_G4"
-                    ): no_multigraph,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_K5"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_C4"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_P4"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_balanced_tree"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_weighted_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality."
-                        "test_normalized_weighted_graph"
-                    ): no_weights,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality.test_weighted_multigraph"
-                    ): no_multigraph,
-                    key(
-                        "test_betweenness_centrality.py:"
-                        "TestWeightedEdgeBetweennessCentrality."
-                        "test_normalized_weighted_multigraph"
-                    ): no_multigraph,
-                }
-            )
-        else:
-            xfail.update(
-                {
-                    key(
-                        "test_louvain.py:test_karate_club_partition"
-                    ): louvain_different,
-                    key("test_louvain.py:test_none_weight_param"): louvain_different,
-                    key("test_louvain.py:test_multigraph"): louvain_different,
-                    # See networkx#6630
-                    key(
-                        "test_louvain.py:test_undirected_selfloops"
-                    ): "self-loops not handled in Louvain",
-                }
-            )
-            if sys.version_info[:2] == (3, 9):
-                # This test is sensitive to RNG, which depends on Python version
-                xfail[key("test_louvain.py:test_threshold")] = (
-                    "Louvain does not support seed parameter"
-                )
-            if _nxver >= (3, 2):
-                if not fallback:
-                    xfail.update(
-                        {
-                            key(
-                                "test_convert_pandas.py:TestConvertPandas."
-                                "test_from_edgelist_multi_attr_incl_target"
-                            ): no_string_dtype,
-                            key(
-                                "test_convert_pandas.py:TestConvertPandas."
-                                "test_from_edgelist_multidigraph_and_edge_attr"
-                            ): no_string_dtype,
-                            key(
-                                "test_convert_pandas.py:TestConvertPandas."
-                                "test_from_edgelist_int_attr_name"
-                            ): no_string_dtype,
-                        }
-                    )
-                if _nxver[1] == 2:
-                    different_iteration_order = "Different graph data iteration order"
-                    xfail.update(
-                        {
-                            key(
-                                "test_cycles.py:TestMinimumCycleBasis."
-                                "test_gh6787_and_edge_attribute_names"
-                            ): different_iteration_order,
-                            key(
-                                "test_euler.py:TestEulerianCircuit."
-                                "test_eulerian_circuit_cycle"
-                            ): different_iteration_order,
-                            key(
-                                "test_gml.py:TestGraph.test_special_float_label"
-                            ): different_iteration_order,
-                        }
-                    )
-                elif _nxver[1] >= 3:
-                    xfail.update(
-                        {
-                            key("test_louvain.py:test_max_level"): louvain_different,
-                        }
-                    )
-
-        too_slow = "Too slow to run"
-        skip = {
-            key("test_tree_isomorphism.py:test_positive"): too_slow,
-            key("test_tree_isomorphism.py:test_negative"): too_slow,
-            # These repeatedly call `bfs_layers`, which converts the graph every call
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph2_different_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph3_same_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph3_different_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp.test_custom_graph4_same_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp."
-                "test_disconnected_graph_all_same_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp."
-                "test_disconnected_graph_all_different_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestGraphISOVF2pp."
-                "test_disconnected_graph_some_same_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp.py:TestMultiGraphISOVF2pp."
-                "test_custom_multigraph3_same_labels"
-            ): too_slow,
-            key(
-                "test_vf2pp_helpers.py:TestNodeOrdering."
-                "test_matching_order_all_branches"
-            ): too_slow,
-        }
-        if os.environ.get("PYTEST_NO_SKIP", False):
-            skip.clear()
-
-        for item in items:
-            kset = set(item.keywords)
-            for (test_name, keywords), reason in xfail.items():
-                if item.name == test_name and keywords.issubset(kset):
-                    item.add_marker(pytest.mark.xfail(reason=reason))
-            for (test_name, keywords), reason in skip.items():
-                if item.name == test_name and keywords.issubset(kset):
-                    item.add_marker(pytest.mark.skip(reason=reason))
-
-    @classmethod
-    def can_run(cls, name, args, kwargs):
-        """Can this backend run the specified algorithms with the given arguments?"""
-        # TODO: drop hasattr when networkx 3.0 support is dropped
-        return hasattr(cls, name) and getattr(cls, name).can_run(*args, **kwargs)
-
-    @classmethod
-    def should_run(cls, name, args, kwargs):
-        """Should this backend run the specified algorithms with the given arguments?"""
-        # TODO: drop hasattr when networkx 3.0 support is dropped
-        return hasattr(cls, name) and getattr(cls, name).should_run(*args, **kwargs)
diff --git a/python/nx-cugraph/nx_cugraph/relabel.py b/python/nx-cugraph/nx_cugraph/relabel.py
deleted file mode 100644
index e38e18c779e..00000000000
--- a/python/nx-cugraph/nx_cugraph/relabel.py
+++ /dev/null
@@ -1,293 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import itertools
-from collections import defaultdict
-
-import cupy as cp
-import networkx as nx
-import numpy as np
-
-import nx_cugraph as nxcg
-
-from .utils import _get_int_dtype, _groupby, index_dtype, networkx_algorithm
-
-__all__ = [
-    "convert_node_labels_to_integers",
-    "relabel_nodes",
-]
-
-
-@networkx_algorithm(version_added="24.08")
-def relabel_nodes(G, mapping, copy=True):
-    G_orig = G
-    if isinstance(G, nx.Graph):
-        is_compat_graph = isinstance(G, nxcg.Graph)
-        if not copy and not is_compat_graph:
-            raise RuntimeError(
-                "Using `copy=False` is invalid when using a NetworkX graph "
-                "as input to `nx_cugraph.relabel_nodes`"
-            )
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    else:
-        is_compat_graph = False
-
-    it = range(G._N) if G.key_to_id is None else G.id_to_key
-    if callable(mapping):
-        previd_to_key = [mapping(node) for node in it]
-    else:
-        previd_to_key = [mapping.get(node, node) for node in it]
-    if not copy:
-        # Our implementation does not need to raise here, but do so to match networkx.
-        it = range(G._N) if G.key_to_id is None else G.id_to_key
-        D = nx.DiGraph([(x, y) for x, y in zip(it, previd_to_key) if x != y])
-        if nx.algorithms.dag.has_cycle(D):
-            raise nx.NetworkXUnfeasible(
-                "The node label sets are overlapping and no ordering can "
-                "resolve the mapping. Use copy=True."
-            )
-    key_to_previd = {val: i for i, val in enumerate(previd_to_key)}
-    newid_to_key = list(key_to_previd)
-    key_to_newid = dict(zip(newid_to_key, range(len(newid_to_key))))
-
-    src_indices = G.src_indices
-    dst_indices = G.dst_indices
-    edge_values = G.edge_values
-    edge_masks = G.edge_masks
-    node_values = G.node_values
-    node_masks = G.node_masks
-    if G.is_multigraph():
-        edge_indices = G.edge_indices
-        edge_keys = G.edge_keys
-    if len(key_to_previd) != G._N:
-        # Some nodes were combined.
-        # Node data doesn't get merged, so use the data from the last shared index
-        int_dtype = _get_int_dtype(G._N - 1)
-        node_indices = cp.fromiter(key_to_previd.values(), int_dtype)
-        node_indices_np = node_indices.get()  # Node data may be cupy or numpy arrays
-        node_values = {key: val[node_indices_np] for key, val in node_values.items()}
-        node_masks = {key: val[node_indices_np] for key, val in node_masks.items()}
-
-        # Renumber, but will have duplicates
-        translations = cp.fromiter(
-            (key_to_newid[key] for key in previd_to_key), index_dtype
-        )
-        src_indices_dup = translations[src_indices]
-        dst_indices_dup = translations[dst_indices]
-
-        if G.is_multigraph():
-            # No merging necessary for multigraphs.
-            if G.is_directed():
-                src_indices = src_indices_dup
-                dst_indices = dst_indices_dup
-            else:
-                # New self-edges should have one edge entry, not two
-                mask = (
-                    # Not self-edges, no need to deduplicate
-                    (src_indices_dup != dst_indices_dup)
-                    # == : already self-edges; no need to deduplicate
-                    # < : if new self-edges, keep where src < dst
-                    | (src_indices <= dst_indices)
-                )
-                if mask.all():
-                    src_indices = src_indices_dup
-                    dst_indices = dst_indices_dup
-                else:
-                    src_indices = src_indices_dup[mask]
-                    dst_indices = dst_indices_dup[mask]
-                    if edge_values:
-                        edge_values = {
-                            key: val[mask] for key, val in edge_values.items()
-                        }
-                        edge_masks = {key: val[mask] for key, val in edge_masks.items()}
-                    if edge_keys is not None:
-                        edge_keys = [
-                            key for keep, key in zip(mask.tolist(), edge_keys) if keep
-                        ]
-                    if edge_indices is not None:
-                        edge_indices = edge_indices[mask]
-            # Handling of `edge_keys` and `edge_indices` is pure Python to match nx.
-            # This may be slower than we'd like; if it's way too slow, should we
-            # direct users to use the defaults of None?
-            if edge_keys is not None:
-                seen = set()
-                new_edge_keys = []
-                for key in zip(src_indices.tolist(), dst_indices.tolist(), edge_keys):
-                    if key in seen:
-                        src, dst, edge_key = key
-                        if not isinstance(edge_key, (int, float)):
-                            edge_key = 0
-                        for edge_key in itertools.count(edge_key):
-                            if (src, dst, edge_key) not in seen:
-                                seen.add((src, dst, edge_key))
-                                break
-                    else:
-                        seen.add(key)
-                        edge_key = key[2]
-                    new_edge_keys.append(edge_key)
-                edge_keys = new_edge_keys
-            if edge_indices is not None:
-                # PERF: can we do this using cupy?
-                seen = set()
-                new_edge_indices = []
-                for key in zip(
-                    src_indices.tolist(), dst_indices.tolist(), edge_indices.tolist()
-                ):
-                    if key in seen:
-                        src, dst, edge_index = key
-                        for edge_index in itertools.count(edge_index):
-                            if (src, dst, edge_index) not in seen:
-                                seen.add((src, dst, edge_index))
-                                break
-                    else:
-                        seen.add(key)
-                        edge_index = key[2]
-                    new_edge_indices.append(edge_index)
-                edge_indices = cp.array(new_edge_indices, index_dtype)
-        else:
-            stacked_dup = cp.vstack((src_indices_dup, dst_indices_dup))
-            if not edge_values:
-                # Drop duplicates
-                stacked = cp.unique(stacked_dup, axis=1)
-            else:
-                # Drop duplicates. This relies heavily on `_groupby`.
-                # It has not been compared to alternative implementations.
-                # I wonder if there are ways to use assignment using duplicate indices.
-                (stacked, ind, inv) = cp.unique(
-                    stacked_dup, axis=1, return_index=True, return_inverse=True
-                )
-                if ind.dtype != int_dtype:
-                    ind = ind.astype(int_dtype)
-                if inv.dtype != int_dtype:
-                    inv = inv.astype(int_dtype)
-
-                # We need to merge edge data
-                mask = cp.ones(src_indices.size, dtype=bool)
-                mask[ind] = False
-                edge_data = [val[mask] for val in edge_values.values()]
-                edge_data.extend(val[mask] for val in edge_masks.values())
-                groups = _groupby(inv[mask], edge_data)
-
-                edge_values = {key: val[ind] for key, val in edge_values.items()}
-                edge_masks = {key: val[ind] for key, val in edge_masks.items()}
-
-                value_keys = list(edge_values.keys())
-                mask_keys = list(edge_masks.keys())
-
-                values_to_update = defaultdict(list)
-                masks_to_update = defaultdict(list)
-                for k, v in groups.items():
-                    it = iter(v)
-                    vals = dict(zip(value_keys, it))  # zip(strict=False)
-                    masks = dict(zip(mask_keys, it))  # zip(strict=True)
-                    for key, val in vals.items():
-                        if key in masks:
-                            val = val[masks[key]]
-                            if val.size > 0:
-                                values_to_update[key].append((k, val[-1]))
-                                masks_to_update[key].append((k, True))
-                        else:
-                            values_to_update[key].append((k, val[-1]))
-                            if key in edge_masks:
-                                masks_to_update[key].append((k, True))
-
-                int_dtype = _get_int_dtype(src_indices.size - 1)
-                for k, v in values_to_update.items():
-                    ii, jj = zip(*v)
-                    edge_val = edge_values[k]
-                    edge_val[cp.array(ii, dtype=int_dtype)] = cp.array(
-                        jj, dtype=edge_val.dtype
-                    )
-                for k, v in masks_to_update.items():
-                    ii, jj = zip(*v)
-                    edge_masks[k][cp.array(ii, dtype=int_dtype)] = cp.array(
-                        jj, dtype=bool
-                    )
-            src_indices = stacked[0]
-            dst_indices = stacked[1]
-
-    if G.is_multigraph():
-        # `edge_keys` and `edge_indices` are preserved for free if no nodes were merged
-        extra_kwargs = {"edge_keys": edge_keys, "edge_indices": edge_indices}
-    else:
-        extra_kwargs = {}
-    rv = G.__class__.from_coo(
-        len(key_to_previd),
-        src_indices,
-        dst_indices,
-        edge_values=edge_values,
-        edge_masks=edge_masks,
-        node_values=node_values,
-        node_masks=node_masks,
-        id_to_key=newid_to_key,
-        key_to_id=key_to_newid,
-        use_compat_graph=is_compat_graph,
-        **extra_kwargs,
-    )
-    rv.graph.update(G.graph)
-    if not copy:
-        G_orig._become(rv)
-        return G_orig
-    return rv
-
-
-@networkx_algorithm(version_added="24.08")
-def convert_node_labels_to_integers(
-    G, first_label=0, ordering="default", label_attribute=None
-):
-    if ordering not in {"default", "sorted", "increasing degree", "decreasing degree"}:
-        raise nx.NetworkXError(f"Unknown node ordering: {ordering}")
-    if isinstance(G, nx.Graph):
-        is_compat_graph = isinstance(G, nxcg.Graph)
-        G = nxcg.from_networkx(G, preserve_all_attrs=True)
-    else:
-        is_compat_graph = False
-    G = G.copy()
-    if label_attribute is not None:
-        prev_vals = G.id_to_key
-        if prev_vals is None:
-            prev_vals = cp.arange(G._N, dtype=_get_int_dtype(G._N - 1))
-        else:
-            try:
-                prev_vals = np.array(prev_vals)
-            except ValueError:
-                prev_vals = np.fromiter(prev_vals, object)
-            else:
-                try:
-                    prev_vals = cp.array(prev_vals)
-                except ValueError:
-                    pass
-        G.node_values[label_attribute] = prev_vals
-        G.node_masks.pop(label_attribute, None)
-    id_to_key = None
-    if ordering == "default" or ordering == "sorted" and G.key_to_id is None:
-        if first_label == 0:
-            G.key_to_id = None
-        else:
-            id_to_key = list(range(first_label, first_label + G._N))
-            G.key_to_id = dict(zip(id_to_key, range(G._N)))
-    elif ordering == "sorted":
-        key_to_id = G.key_to_id
-        G.key_to_id = {
-            i: key_to_id[n] for i, n in enumerate(sorted(key_to_id), first_label)
-        }
-    else:
-        pairs = sorted(
-            ((d, n) for (n, d) in G._nodearray_to_dict(G._degrees_array()).items()),
-            reverse=ordering == "decreasing degree",
-        )
-        key_to_id = G.key_to_id
-        G.key_to_id = {i: key_to_id[n] for i, (d, n) in enumerate(pairs, first_label)}
-    G._id_to_key = id_to_key
-    if is_compat_graph:
-        return G._to_compat_graph()
-    return G
diff --git a/python/nx-cugraph/nx_cugraph/scripts/__init__.py b/python/nx-cugraph/nx_cugraph/scripts/__init__.py
deleted file mode 100644
index aeae6078111..00000000000
--- a/python/nx-cugraph/nx_cugraph/scripts/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/nx-cugraph/nx_cugraph/scripts/__main__.py b/python/nx-cugraph/nx_cugraph/scripts/__main__.py
deleted file mode 100755
index c0963e64cc5..00000000000
--- a/python/nx-cugraph/nx_cugraph/scripts/__main__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-if __name__ == "__main__":
-    import argparse
-
-    from nx_cugraph.scripts import print_table, print_tree
-
-    parser = argparse.ArgumentParser(
-        parents=[
-            print_table.get_argumentparser(add_help=False),
-            print_tree.get_argumentparser(add_help=False),
-        ],
-        description="Print info about functions implemented by nx-cugraph",
-    )
-    parser.add_argument("action", choices=["print_table", "print_tree"])
-    args = parser.parse_args()
-    if args.action == "print_table":
-        print_table.main()
-    else:
-        print_tree.main(
-            by=args.by,
-            networkx_path=args.networkx_path,
-            dispatch_name=args.dispatch_name or args.dispatch_name_always,
-            version_added=args.version_added,
-            plc=args.plc,
-            dispatch_name_if_different=not args.dispatch_name_always,
-        )
diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py
deleted file mode 100755
index 7c90281247c..00000000000
--- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import sys
-from collections import namedtuple
-
-from networkx.utils.backends import _registered_algorithms as algos
-
-from _nx_cugraph import get_info
-from nx_cugraph.interface import BackendInterface
-
-
-def get_funcpath(func):
-    return f"{func.__module__}.{func.__name__}"
-
-
-def get_path_to_name():
-    return {
-        get_funcpath(algos[funcname]): funcname
-        for funcname in get_info()["functions"].keys() & algos.keys()
-    }
-
-
-Info = namedtuple(
-    "Info",
-    "networkx_path, dispatch_name, version_added, plc, is_incomplete, is_different",
-)
-
-
-def get_path_to_info(path_to_name=None, version_added_sep=".", plc_sep="/"):
-    if path_to_name is None:
-        path_to_name = get_path_to_name()
-    rv = {}
-    for funcpath in sorted(path_to_name):
-        funcname = path_to_name[funcpath]
-        cufunc = getattr(BackendInterface, funcname)
-        plc = plc_sep.join(sorted(cufunc._plc_names)) if cufunc._plc_names else ""
-        version_added = cufunc.version_added.replace(".", version_added_sep)
-        is_incomplete = cufunc.is_incomplete
-        is_different = cufunc.is_different
-        rv[funcpath] = Info(
-            funcpath, funcname, version_added, plc, is_incomplete, is_different
-        )
-    return rv
-
-
-def main(path_to_info=None, *, file=sys.stdout):
-    if path_to_info is None:
-        path_to_info = get_path_to_info(version_added_sep=".")
-    lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"]
-    lines.extend(",".join(map(str, info)) for info in path_to_info.values())
-    text = "\n".join(lines)
-    if file is not None:
-        print(text, file=file)
-    return text
-
-
-def get_argumentparser(add_help=True):
-    return argparse.ArgumentParser(
-        description="Print info about functions implemented by nx-cugraph as CSV",
-        add_help=add_help,
-    )
-
-
-if __name__ == "__main__":
-    parser = get_argumentparser()
-    args = parser.parse_args()
-    main()
diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py
deleted file mode 100755
index fbb1c3dd0c5..00000000000
--- a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py
+++ /dev/null
@@ -1,271 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import sys
-
-from nx_cugraph.scripts.print_table import Info, get_path_to_info
-
-
-def assoc_in(d, keys, value):
-    """Like Clojure's assoc-in, but modifies d in-place."""
-    inner = d
-    keys = iter(keys)
-    key = next(keys)
-    for next_key in keys:
-        if key not in inner:
-            inner[key] = {}
-        inner = inner[key]
-        key = next_key
-    inner[key] = value
-    return d
-
-
-def default_get_payload_internal(keys):
-    return keys[-1]
-
-
-def tree_lines(
-    tree,
-    parents=(),
-    are_levels_closing=(),
-    get_payload_internal=default_get_payload_internal,
-):
-    pre = "".join(
-        "    " if is_level_closing else " │  "
-        for is_level_closing in are_levels_closing
-    )
-    c = "├"
-    are_levels_closing += (False,)
-    for i, (key, val) in enumerate(tree.items(), 1):
-        if i == len(tree):  # Last item
-            c = "└"
-            are_levels_closing = are_levels_closing[:-1] + (True,)
-        if isinstance(val, str):
-            yield pre + f" {c}─ " + val
-        else:
-            yield pre + f" {c}─ " + get_payload_internal((*parents, key))
-            yield from tree_lines(
-                val,
-                (*parents, key),
-                are_levels_closing,
-                get_payload_internal=get_payload_internal,
-            )
-
-
-def get_payload(
-    info,
-    *,
-    networkx_path=False,
-    dispatch_name=False,
-    version_added=False,
-    plc=False,
-    dispatch_name_if_different=False,
-    incomplete=False,
-    different=False,
-):
-    extra = []
-    if networkx_path:
-        extra.append(info.networkx_path)
-    if dispatch_name and (
-        not dispatch_name_if_different
-        or info.dispatch_name != info.networkx_path.rsplit(".", 1)[-1]
-    ):
-        extra.append(info.dispatch_name)
-    if version_added:
-        v = info.version_added
-        if len(v) != 5:
-            raise ValueError(f"Is there something wrong with version: {v!r}?")
-        extra.append(v[:2] + "." + v[-2:])
-    if plc and info.plc:
-        extra.append(info.plc)
-    if incomplete and info.is_incomplete:
-        extra.append("is-incomplete")
-    if different and info.is_different:
-        extra.append("is-different")
-    extra = ", ".join(extra)
-    if extra:
-        extra = f" ({extra})"
-    return info.networkx_path.rsplit(".", 1)[-1] + extra
-
-
-def create_tree(
-    path_to_info=None,
-    *,
-    by="networkx_path",
-    skip=0,
-    networkx_path=False,
-    dispatch_name=False,
-    version_added=False,
-    plc=False,
-    dispatch_name_if_different=False,
-    incomplete=False,
-    different=False,
-    prefix="",
-    strip_networkx=True,
-    get_payload=get_payload,
-):
-    if path_to_info is None:
-        path_to_info = get_path_to_info()
-    if strip_networkx:
-        path_to_info = {
-            key: Info(info.networkx_path.replace("networkx.", "", 1), *info[1:])
-            for key, info in path_to_info.items()
-        }
-    if isinstance(by, str):
-        by = [by]
-    # We rely on the fact that dicts maintain order
-    tree = {}
-    for info in sorted(
-        path_to_info.values(),
-        key=lambda x: (*(getattr(x, b) for b in by), x.networkx_path),
-    ):
-        if not all(getattr(info, b) for b in by):
-            continue
-        path = prefix + ".".join(getattr(info, b) for b in by)
-        payload = get_payload(
-            info,
-            networkx_path=networkx_path,
-            dispatch_name=dispatch_name,
-            version_added=version_added,
-            plc=plc,
-            dispatch_name_if_different=dispatch_name_if_different,
-            incomplete=incomplete,
-            different=different,
-        )
-        assoc_in(tree, path.split("."), payload)
-    return tree
-
-
-def main(
-    path_to_info=None,
-    *,
-    by="networkx_path",
-    networkx_path=False,
-    dispatch_name=False,
-    version_added=False,
-    plc=False,
-    dispatch_name_if_different=True,
-    incomplete=False,
-    different=False,
-    file=sys.stdout,
-):
-    if path_to_info is None:
-        path_to_info = get_path_to_info(version_added_sep="-")
-    kwargs = {
-        "networkx_path": networkx_path,
-        "dispatch_name": dispatch_name,
-        "version_added": version_added,
-        "plc": plc,
-        "dispatch_name_if_different": dispatch_name_if_different,
-        "incomplete": incomplete,
-        "different": different,
-    }
-    if by == "networkx_path":
-        tree = create_tree(path_to_info, by="networkx_path", **kwargs)
-        text = "\n".join(tree_lines(tree))
-    elif by == "plc":
-        tree = create_tree(
-            path_to_info,
-            by=["plc", "networkx_path"],
-            prefix="plc-",
-            **kwargs,
-        )
-        text = "\n".join(tree_lines(tree)).replace("plc-", "plc.")
-    elif by == "version_added":
-        tree = create_tree(
-            path_to_info,
-            by=["version_added", "networkx_path"],
-            prefix="version_added-",
-            **kwargs,
-        )
-        text = "\n".join(tree_lines(tree)).replace("version_added-", "version: ")
-        for digit in "0123456789":
-            text = text.replace(f"2{digit}-", f"2{digit}.")
-    else:
-        raise ValueError(
-            "`by` argument should be one of {'networkx_path', 'plc', 'version_added' "
-            f"got: {by}"
-        )
-    if file is not None:
-        print(text, file=file)
-    return text
-
-
-def get_argumentparser(add_help=True):
-    parser = argparse.ArgumentParser(
-        "Print a tree showing NetworkX functions implemented by nx-cugraph",
-        add_help=add_help,
-    )
-    parser.add_argument(
-        "--by",
-        choices=["networkx_path", "plc", "version_added"],
-        default="networkx_path",
-        help="How to group functions",
-    )
-    parser.add_argument(
-        "--dispatch-name",
-        "--dispatch_name",
-        action="store_true",
-        help="Show the dispatch name in parentheses if different from NetworkX name",
-    )
-    parser.add_argument(
-        "--dispatch-name-always",
-        "--dispatch_name_always",
-        action="store_true",
-        help="Always show the dispatch name in parentheses",
-    )
-    parser.add_argument(
-        "--plc",
-        "--pylibcugraph",
-        action="store_true",
-        help="Show the used pylibcugraph function in parentheses",
-    )
-    parser.add_argument(
-        "--version-added",
-        "--version_added",
-        action="store_true",
-        help="Show the version added in parentheses",
-    )
-    parser.add_argument(
-        "--networkx-path",
-        "--networkx_path",
-        action="store_true",
-        help="Show the full networkx path in parentheses",
-    )
-    parser.add_argument(
-        "--incomplete",
-        action="store_true",
-        help="Show which functions are incomplete",
-    )
-    parser.add_argument(
-        "--different",
-        action="store_true",
-        help="Show which functions are different",
-    )
-    return parser
-
-
-if __name__ == "__main__":
-    parser = get_argumentparser()
-    args = parser.parse_args()
-    main(
-        by=args.by,
-        networkx_path=args.networkx_path,
-        dispatch_name=args.dispatch_name or args.dispatch_name_always,
-        version_added=args.version_added,
-        plc=args.plc,
-        dispatch_name_if_different=not args.dispatch_name_always,
-        incomplete=args.incomplete,
-        different=args.different,
-    )
diff --git a/python/nx-cugraph/nx_cugraph/tests/__init__.py b/python/nx-cugraph/nx_cugraph/tests/__init__.py
deleted file mode 100644
index c2002fd3fb9..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/python/nx-cugraph/nx_cugraph/tests/bench_convert.py b/python/nx-cugraph/nx_cugraph/tests/bench_convert.py
deleted file mode 100644
index 2eb432230eb..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/bench_convert.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import random
-
-import networkx as nx
-import numpy as np
-import pytest
-
-import nx_cugraph as nxcg
-
-try:
-    import cugraph
-except ModuleNotFoundError:
-    cugraph = None
-try:
-    import scipy
-except ModuleNotFoundError:
-    scipy = None
-
-# If the rapids-pytest-benchmark plugin is installed, the "gpubenchmark"
-# fixture will be available automatically. Check that this fixture is available
-# by trying to import rapids_pytest_benchmark, and if that fails, set
-# "gpubenchmark" to the standard "benchmark" fixture provided by
-# pytest-benchmark.
-try:
-    import rapids_pytest_benchmark  # noqa: F401
-except ModuleNotFoundError:
-    import pytest_benchmark
-
-    gpubenchmark = pytest_benchmark.plugin.benchmark
-
-CREATE_USING = [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-
-
-def _bench_helper(gpubenchmark, N, attr_kind, create_using, method):
-    G = method(N, create_using=create_using)
-    if attr_kind:
-        skip = True
-        for *_ids, edgedict in G.edges(data=True):
-            skip = not skip
-            if skip and attr_kind not in {"full", "required", "required_dtype"}:
-                continue
-            edgedict["x"] = random.randint(0, 100000)
-        if attr_kind == "preserve":
-            gpubenchmark(nxcg.from_networkx, G, preserve_edge_attrs=True)
-        elif attr_kind == "half_missing":
-            gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": None})
-        elif attr_kind == "required":
-            gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": ...})
-        elif attr_kind == "required_dtype":
-            gpubenchmark(
-                nxcg.from_networkx,
-                G,
-                edge_attrs={"x": ...},
-                edge_dtypes={"x": np.int32},
-            )
-        else:  # full, half_default
-            gpubenchmark(nxcg.from_networkx, G, edge_attrs={"x": 0})
-    else:
-        gpubenchmark(nxcg.from_networkx, G)
-
-
-def _bench_helper_cugraph(
-    gpubenchmark, N, attr_kind, create_using, method, do_renumber
-):
-    G = method(N, create_using=create_using)
-    if attr_kind:
-        for *_ids, edgedict in G.edges(data=True):
-            edgedict["x"] = random.randint(0, 100000)
-        gpubenchmark(cugraph.utilities.convert_from_nx, G, "x", do_renumber=do_renumber)
-    else:
-        gpubenchmark(cugraph.utilities.convert_from_nx, G, do_renumber=do_renumber)
-
-
-def _bench_helper_scipy(gpubenchmark, N, attr_kind, create_using, method, fmt):
-    G = method(N, create_using=create_using)
-    if attr_kind:
-        for *_ids, edgedict in G.edges(data=True):
-            edgedict["x"] = random.randint(0, 100000)
-        gpubenchmark(nx.to_scipy_sparse_array, G, weight="x", format=fmt)
-    else:
-        gpubenchmark(nx.to_scipy_sparse_array, G, weight=None, format=fmt)
-
-
-@pytest.mark.parametrize("N", [1, 10**6])
-@pytest.mark.parametrize(
-    "attr_kind",
-    [
-        "required_dtype",
-        "required",
-        "full",
-        "half_missing",
-        "half_default",
-        "preserve",
-        None,
-    ],
-)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def bench_cycle_graph(gpubenchmark, N, attr_kind, create_using):
-    _bench_helper(gpubenchmark, N, attr_kind, create_using, nx.cycle_graph)
-
-
-@pytest.mark.skipif("not cugraph")
-@pytest.mark.parametrize("N", [1, 10**6])
-@pytest.mark.parametrize("attr_kind", ["full", None])
-@pytest.mark.parametrize("create_using", CREATE_USING)
-@pytest.mark.parametrize("do_renumber", [True, False])
-def bench_cycle_graph_cugraph(gpubenchmark, N, attr_kind, create_using, do_renumber):
-    if N == 1 and not do_renumber:
-        do_renumber = True
-    _bench_helper_cugraph(
-        gpubenchmark, N, attr_kind, create_using, nx.cycle_graph, do_renumber
-    )
-
-
-@pytest.mark.skipif("not scipy")
-@pytest.mark.parametrize("N", [1, 10**6])
-@pytest.mark.parametrize("attr_kind", ["full", None])
-@pytest.mark.parametrize("create_using", CREATE_USING)
-@pytest.mark.parametrize("fmt", ["coo", "csr"])
-def bench_cycle_graph_scipy(gpubenchmark, N, attr_kind, create_using, fmt):
-    _bench_helper_scipy(gpubenchmark, N, attr_kind, create_using, nx.cycle_graph, fmt)
-
-
-@pytest.mark.parametrize("N", [1, 1500])
-@pytest.mark.parametrize(
-    "attr_kind",
-    [
-        "required_dtype",
-        "required",
-        "full",
-        "half_missing",
-        "half_default",
-        "preserve",
-        None,
-    ],
-)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def bench_complete_graph_edgedata(gpubenchmark, N, attr_kind, create_using):
-    _bench_helper(gpubenchmark, N, attr_kind, create_using, nx.complete_graph)
-
-
-@pytest.mark.parametrize("N", [3000])
-@pytest.mark.parametrize("attr_kind", [None])
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def bench_complete_graph_noedgedata(gpubenchmark, N, attr_kind, create_using):
-    _bench_helper(gpubenchmark, N, attr_kind, create_using, nx.complete_graph)
-
-
-@pytest.mark.skipif("not cugraph")
-@pytest.mark.parametrize("N", [1, 1500])
-@pytest.mark.parametrize("attr_kind", ["full", None])
-@pytest.mark.parametrize("create_using", CREATE_USING)
-@pytest.mark.parametrize("do_renumber", [True, False])
-def bench_complete_graph_cugraph(gpubenchmark, N, attr_kind, create_using, do_renumber):
-    if N == 1 and not do_renumber:
-        do_renumber = True
-    _bench_helper_cugraph(
-        gpubenchmark, N, attr_kind, create_using, nx.complete_graph, do_renumber
-    )
-
-
-@pytest.mark.skipif("not scipy")
-@pytest.mark.parametrize("N", [1, 1500])
-@pytest.mark.parametrize("attr_kind", ["full", None])
-@pytest.mark.parametrize("create_using", CREATE_USING)
-@pytest.mark.parametrize("fmt", ["coo", "csr"])
-def bench_complete_graph_scipy(gpubenchmark, N, attr_kind, create_using, fmt):
-    _bench_helper_scipy(
-        gpubenchmark, N, attr_kind, create_using, nx.complete_graph, fmt
-    )
diff --git a/python/nx-cugraph/nx_cugraph/tests/conftest.py b/python/nx-cugraph/nx_cugraph/tests/conftest.py
deleted file mode 100644
index e5a250784b2..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/conftest.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-def pytest_configure(config):
-    if config.getoption("--all", False):
-        # Run benchmarks AND tests
-        config.option.benchmark_skip = False
-        config.option.benchmark_enable = True
-    elif config.getoption("--bench", False) or config.getoption(
-        "--benchmark-enable", False
-    ):
-        # Run benchmarks (and only benchmarks) with `--bench` argument
-        config.option.benchmark_skip = False
-        config.option.benchmark_enable = True
-        if not config.option.keyword:
-            config.option.keyword = "bench_"
-    else:
-        # Run only tests
-        config.option.benchmark_skip = True
-        config.option.benchmark_enable = False
-        if not config.option.keyword:
-            config.option.keyword = "test_"
diff --git a/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py
deleted file mode 100644
index 7047f0eeafd..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Ensure that all functions wrapped by @networkx_algorithm were called.
-
-This file is run by CI and should not normally be run manually.
-"""
-import inspect
-import json
-from pathlib import Path
-
-from nx_cugraph.interface import BackendInterface
-from nx_cugraph.utils import networkx_algorithm
-
-with Path("coverage.json").open() as f:
-    coverage = json.load(f)
-
-filenames_to_executed_lines = {
-    "nx_cugraph/"
-    + filename.rsplit("nx_cugraph/", 1)[-1]: set(coverage_info["executed_lines"])
-    for filename, coverage_info in coverage["files"].items()
-}
-
-
-def unwrap(func):
-    while hasattr(func, "__wrapped__"):
-        func = func.__wrapped__
-    return func
-
-
-def get_func_filename(func):
-    return "nx_cugraph" + inspect.getfile(unwrap(func)).rsplit("nx_cugraph", 1)[-1]
-
-
-def get_func_linenos(func):
-    lines, lineno = inspect.getsourcelines(unwrap(func))
-    for i, line in enumerate(lines, lineno):
-        if ":\n" in line:
-            return set(range(i + 1, lineno + len(lines)))
-    raise RuntimeError(f"Could not determine line numbers for function {func}")
-
-
-def has_any_coverage(func):
-    return bool(
-        filenames_to_executed_lines[get_func_filename(func)] & get_func_linenos(func)
-    )
-
-
-def main():
-    no_coverage = set()
-    for attr, func in vars(BackendInterface).items():
-        if not isinstance(func, networkx_algorithm):
-            continue
-        if not has_any_coverage(func):
-            no_coverage.add(attr)
-    if no_coverage:
-        msg = "The following algorithms have no coverage: " + ", ".join(
-            sorted(no_coverage)
-        )
-        # Create a border of "!"
-        msg = (
-            "\n\n"
-            + "!" * (len(msg) + 6)
-            + "\n!! "
-            + msg
-            + " !!\n"
-            + "!" * (len(msg) + 6)
-            + "\n"
-        )
-        raise AssertionError(msg)
-    print("\nSuccess: coverage determined all algorithms were called!\n")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/nx-cugraph/nx_cugraph/tests/pytest.ini b/python/nx-cugraph/nx_cugraph/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py
deleted file mode 100644
index ad2c62c1fb9..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-from nx_cugraph import _nxver
-
-if _nxver < (3, 2):
-    pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True)
-
-
-def test_generic_bfs_edges():
-    # generic_bfs_edges currently isn't exercised by networkx tests
-    Gnx = nx.karate_club_graph()
-    Gcg = nx.karate_club_graph(backend="cugraph")
-    for depth_limit in (0, 1, 2):
-        for source in Gnx:
-            # Some ordering is arbitrary, so I think there's a chance
-            # this test may fail if networkx or nx-cugraph changes.
-            nx_result = nx.generic_bfs_edges(Gnx, source, depth_limit=depth_limit)
-            cg_result = nx.generic_bfs_edges(Gcg, source, depth_limit=depth_limit)
-            assert sorted(nx_result) == sorted(cg_result), (source, depth_limit)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_classes.py b/python/nx-cugraph/nx_cugraph/tests/test_classes.py
deleted file mode 100644
index 0ac238b3558..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_classes.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import nx_cugraph as nxcg
-
-
-def test_class_to_class():
-    """Basic sanity checks to ensure metadata relating graph classes are accurate."""
-    for prefix in ["", "Cuda"]:
-        for suffix in ["Graph", "DiGraph", "MultiGraph", "MultiDiGraph"]:
-            cls_name = f"{prefix}{suffix}"
-            cls = getattr(nxcg, cls_name)
-            assert cls.__name__ == cls_name
-            G = cls()
-            assert cls is G.__class__
-            # cudagraph
-            val = cls.to_cudagraph_class()
-            val2 = G.to_cudagraph_class()
-            assert val is val2
-            assert val.__name__ == f"Cuda{suffix}"
-            assert val.__module__.startswith("nx_cugraph")
-            assert cls.is_directed() == G.is_directed() == val.is_directed()
-            assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph()
-            # networkx
-            val = cls.to_networkx_class()
-            val2 = G.to_networkx_class()
-            assert val is val2
-            assert val.__name__ == suffix
-            assert val.__module__.startswith("networkx")
-            val = val()
-            assert cls.is_directed() == G.is_directed() == val.is_directed()
-            assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph()
-            # directed
-            val = cls.to_directed_class()
-            val2 = G.to_directed_class()
-            assert val is val2
-            assert val.__module__.startswith("nx_cugraph")
-            assert val.is_directed()
-            assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph()
-            if "Di" in suffix:
-                assert val is cls
-            else:
-                assert "Di" in val.__name__
-                assert prefix in val.__name__
-                assert cls.to_undirected_class() is cls
-            # undirected
-            val = cls.to_undirected_class()
-            val2 = G.to_undirected_class()
-            assert val is val2
-            assert val.__module__.startswith("nx_cugraph")
-            assert not val.is_directed()
-            assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph()
-            if "Di" not in suffix:
-                assert val is cls
-            else:
-                assert "Di" not in val.__name__
-                assert prefix in val.__name__
-                assert cls.to_directed_class() is cls
-            # "zero"
-            if prefix == "Cuda":
-                val = cls._to_compat_graph_class()
-                val2 = G._to_compat_graph_class()
-                assert val is val2
-                assert val.__name__ == suffix
-                assert val.__module__.startswith("nx_cugraph")
-                assert val.to_cudagraph_class() is cls
-                assert cls.is_directed() == G.is_directed() == val.is_directed()
-                assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph()
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_classes_function.py b/python/nx-cugraph/nx_cugraph/tests/test_classes_function.py
deleted file mode 100644
index d6152f650fb..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_classes_function.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Test functions from nx_cugraph/classes/function.py"""
-import networkx as nx
-
-import nx_cugraph as nxcg
-
-
-def test_is_negatively_weighted():
-    Gnx = nx.MultiGraph()
-    Gnx.add_edge(0, 1, 2, weight=-3)
-    Gnx.add_edge(2, 3, foo=3)
-    Gcg = nxcg.from_networkx(Gnx, preserve_edge_attrs=True)
-    assert nx.is_negatively_weighted(Gnx)
-    assert nxcg.is_negatively_weighted(Gnx)
-    assert nxcg.is_negatively_weighted(Gcg)
-    assert not nx.is_negatively_weighted(Gnx, weight="foo")
-    assert not nxcg.is_negatively_weighted(Gcg, weight="foo")
-    assert not nx.is_negatively_weighted(Gnx, weight="bar")
-    assert not nxcg.is_negatively_weighted(Gcg, weight="bar")
-    assert nx.is_negatively_weighted(Gnx, (0, 1, 2))
-    assert nxcg.is_negatively_weighted(Gcg, (0, 1, 2))
-    assert nx.is_negatively_weighted(Gnx, (0, 1)) == nxcg.is_negatively_weighted(
-        Gcg, (0, 1)
-    )
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py
deleted file mode 100644
index fd8e1b3cf13..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-from nx_cugraph import _nxver
-
-if _nxver < (3, 2):
-    pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True)
-
-
-def test_selfloops():
-    G = nx.complete_graph(5)
-    H = nx.complete_graph(5)
-    H.add_edge(0, 0)
-    H.add_edge(1, 1)
-    H.add_edge(2, 2)
-    # triangles
-    expected = nx.triangles(G)
-    assert expected == nx.triangles(H)
-    assert expected == nx.triangles(G, backend="cugraph")
-    assert expected == nx.triangles(H, backend="cugraph")
-    # average_clustering
-    expected = nx.average_clustering(G)
-    assert expected == nx.average_clustering(H)
-    assert expected == nx.average_clustering(G, backend="cugraph")
-    assert expected == nx.average_clustering(H, backend="cugraph")
-    # clustering
-    expected = nx.clustering(G)
-    assert expected == nx.clustering(H)
-    assert expected == nx.clustering(G, backend="cugraph")
-    assert expected == nx.clustering(H, backend="cugraph")
-    # transitivity
-    expected = nx.transitivity(G)
-    assert expected == nx.transitivity(H)
-    assert expected == nx.transitivity(G, backend="cugraph")
-    assert expected == nx.transitivity(H, backend="cugraph")
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_community.py b/python/nx-cugraph/nx_cugraph/tests/test_community.py
deleted file mode 100644
index 126f45c14ae..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_community.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-
-
-def test_louvain_isolated_nodes():
-    is_nx_30_or_31 = hasattr(nx.classes, "backends")
-
-    def check(left, right):
-        assert len(left) == len(right)
-        assert set(map(frozenset, left)) == set(map(frozenset, right))
-
-    # Empty graph (no nodes)
-    G = nx.Graph()
-    if is_nx_30_or_31:
-        with pytest.raises(ZeroDivisionError):
-            nx.community.louvain_communities(G)
-    else:
-        nx_result = nx.community.louvain_communities(G)
-        cg_result = nxcg.community.louvain_communities(G)
-        check(nx_result, cg_result)
-    # Graph with no edges
-    G.add_nodes_from(range(5))
-    if is_nx_30_or_31:
-        with pytest.raises(ZeroDivisionError):
-            nx.community.louvain_communities(G)
-    else:
-        nx_result = nx.community.louvain_communities(G)
-        cg_result = nxcg.community.louvain_communities(G)
-        check(nx_result, cg_result)
-    # Graph with isolated nodes
-    G.add_edge(1, 2)
-    nx_result = nx.community.louvain_communities(G)
-    cg_result = nxcg.community.louvain_communities(G)
-    check(nx_result, cg_result)
-    # Another one
-    G.add_edge(4, 4)
-    nx_result = nx.community.louvain_communities(G)
-    cg_result = nxcg.community.louvain_communities(G)
-    check(nx_result, cg_result)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_connected.py b/python/nx-cugraph/nx_cugraph/tests/test_connected.py
deleted file mode 100644
index fa9f283abc0..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_connected.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-
-import nx_cugraph as nxcg
-
-
-def test_connected_isolated_nodes():
-    G = nx.complete_graph(4)
-    G.add_node(max(G) + 1)
-    assert nx.is_connected(G) is False
-    assert nxcg.is_connected(G) is False
-    assert nx.number_connected_components(G) == 2
-    assert nxcg.number_connected_components(G) == 2
-    assert sorted(nx.connected_components(G)) == [{0, 1, 2, 3}, {4}]
-    assert sorted(nxcg.connected_components(G)) == [{0, 1, 2, 3}, {4}]
-    assert nx.node_connected_component(G, 0) == {0, 1, 2, 3}
-    assert nxcg.node_connected_component(G, 0) == {0, 1, 2, 3}
-    assert nx.node_connected_component(G, 4) == {4}
-    assert nxcg.node_connected_component(G, 4) == {4}
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert.py b/python/nx-cugraph/nx_cugraph/tests/test_convert.py
deleted file mode 100644
index 3d109af8a74..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_convert.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-from nx_cugraph import interface
-
-
-@pytest.mark.parametrize(
-    "graph_class", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-)
-@pytest.mark.parametrize(
-    "kwargs",
-    [
-        {},
-        {"preserve_edge_attrs": True},
-        {"preserve_node_attrs": True},
-        {"preserve_all_attrs": True},
-        {"edge_attrs": {"x": 0}},
-        {"edge_attrs": {"x": None}},
-        {"edge_attrs": {"x": nxcg.convert.REQUIRED}},
-        {"edge_attrs": {"x": ...}},  # sugar for REQUIRED
-        {"edge_attrs": "x"},
-        {"node_attrs": {"x": 0}},
-        {"node_attrs": {"x": None}},
-        {"node_attrs": {"x": nxcg.convert.REQUIRED}},
-        {"node_attrs": {"x": ...}},  # sugar for REQUIRED
-        {"node_attrs": "x"},
-    ],
-)
-def test_convert_empty(graph_class, kwargs):
-    G = graph_class()
-    Gcg = nxcg.from_networkx(G, **kwargs)
-    H = nxcg.to_networkx(Gcg)
-    assert G.number_of_nodes() == Gcg.number_of_nodes() == H.number_of_nodes() == 0
-    assert G.number_of_edges() == Gcg.number_of_edges() == H.number_of_edges() == 0
-    assert Gcg.edge_values == Gcg.edge_masks == Gcg.node_values == Gcg.node_masks == {}
-    assert G.graph == Gcg.graph == H.graph == {}
-
-
-@pytest.mark.parametrize("graph_class", [nx.Graph, nx.MultiGraph])
-def test_convert(graph_class):
-    # FIXME: can we break this into smaller tests?
-    G = graph_class()
-    G.add_edge(0, 1, x=2)
-    G.add_node(0, foo=10)
-    G.add_node(1, foo=20, bar=100)
-    for kwargs in [
-        {"preserve_edge_attrs": True},
-        {"preserve_all_attrs": True},
-        {"edge_attrs": {"x": 0}},
-        {"edge_attrs": {"x": None}, "node_attrs": {"bar": None}},
-        {"edge_attrs": "x", "edge_dtypes": int},
-        {
-            "edge_attrs": {"x": nxcg.convert.REQUIRED},
-            "node_attrs": {"foo": nxcg.convert.REQUIRED},
-        },
-        {"edge_attrs": {"x": ...}, "node_attrs": {"foo": ...}},  # sugar for REQUIRED
-    ]:
-        # All edges have "x" attribute, so all kwargs are equivalent
-        Gcg = nxcg.from_networkx(G, **kwargs)
-        cp.testing.assert_array_equal(Gcg.src_indices, [0, 1])
-        cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0])
-        cp.testing.assert_array_equal(Gcg.edge_values["x"], [2, 2])
-        assert len(Gcg.edge_values) == 1
-        assert Gcg.edge_masks == {}
-        H = nxcg.to_networkx(Gcg)
-        assert G.number_of_nodes() == Gcg.number_of_nodes() == H.number_of_nodes() == 2
-        assert G.number_of_edges() == Gcg.number_of_edges() == H.number_of_edges() == 1
-        assert G.adj == H.adj
-
-    with pytest.raises(KeyError, match="bar"):
-        nxcg.from_networkx(G, node_attrs={"bar": ...})
-
-    # Structure-only graph (no edge attributes)
-    Gcg = nxcg.from_networkx(G, preserve_node_attrs=True)
-    cp.testing.assert_array_equal(Gcg.src_indices, [0, 1])
-    cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0])
-    cp.testing.assert_array_equal(Gcg.node_values["foo"], [10, 20])
-    assert Gcg.edge_values == Gcg.edge_masks == {}
-    H = nxcg.to_networkx(Gcg)
-    if G.is_multigraph():
-        assert set(G.edges) == set(H.edges) == {(0, 1, 0)}
-    else:
-        assert set(G.edges) == set(H.edges) == {(0, 1)}
-    assert G.nodes == H.nodes
-
-    # Fill completely missing attribute with default value
-    Gcg = nxcg.from_networkx(G, edge_attrs={"y": 0})
-    cp.testing.assert_array_equal(Gcg.src_indices, [0, 1])
-    cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0])
-    cp.testing.assert_array_equal(Gcg.edge_values["y"], [0, 0])
-    assert len(Gcg.edge_values) == 1
-    assert Gcg.edge_masks == Gcg.node_values == Gcg.node_masks == {}
-    H = nxcg.to_networkx(Gcg)
-    assert list(H.edges(data=True)) == [(0, 1, {"y": 0})]
-    if Gcg.is_multigraph():
-        assert set(H.edges) == {(0, 1, 0)}
-
-    # If attribute is completely missing (and no default), then just ignore it
-    Gcg = nxcg.from_networkx(G, edge_attrs={"y": None})
-    cp.testing.assert_array_equal(Gcg.src_indices, [0, 1])
-    cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0])
-    assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == []
-    H = nxcg.to_networkx(Gcg)
-    assert list(H.edges(data=True)) == [(0, 1, {})]
-    if Gcg.is_multigraph():
-        assert set(H.edges) == {(0, 1, 0)}
-
-    G.add_edge(0, 2)
-    # Some edges are missing 'x' attribute; need to use a mask
-    for kwargs in [{"preserve_edge_attrs": True}, {"edge_attrs": {"x": None}}]:
-        Gcg = nxcg.from_networkx(G, **kwargs)
-        cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2])
-        cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0])
-        assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == ["x"]
-        cp.testing.assert_array_equal(Gcg.edge_masks["x"], [True, False, True, False])
-        cp.testing.assert_array_equal(Gcg.edge_values["x"][Gcg.edge_masks["x"]], [2, 2])
-    H = nxcg.to_networkx(Gcg)
-    assert list(H.edges(data=True)) == [(0, 1, {"x": 2}), (0, 2, {})]
-    if Gcg.is_multigraph():
-        assert set(H.edges) == {(0, 1, 0), (0, 2, 0)}
-
-    with pytest.raises(KeyError, match="x"):
-        nxcg.from_networkx(G, edge_attrs={"x": nxcg.convert.REQUIRED})
-    with pytest.raises(KeyError, match="x"):
-        nxcg.from_networkx(G, edge_attrs={"x": ...})
-    with pytest.raises(KeyError, match="bar"):
-        nxcg.from_networkx(G, node_attrs={"bar": nxcg.convert.REQUIRED})
-    with pytest.raises(KeyError, match="bar"):
-        nxcg.from_networkx(G, node_attrs={"bar": ...})
-
-    # Now for something more complicated...
-    G = graph_class()
-    G.add_edge(10, 20, x=1)
-    G.add_edge(10, 30, x=2, y=1.5)
-    G.add_node(10, foo=100)
-    G.add_node(20, foo=200, bar=1000)
-    G.add_node(30, foo=300)
-    # Some edges have masks, some don't
-    for kwargs in [
-        {"preserve_edge_attrs": True},
-        {"preserve_all_attrs": True},
-        {"edge_attrs": {"x": None, "y": None}},
-        {"edge_attrs": {"x": 0, "y": None}},
-        {"edge_attrs": {"x": 0, "y": None}},
-        {"edge_attrs": {"x": 0, "y": None}, "edge_dtypes": {"x": int, "y": float}},
-    ]:
-        Gcg = nxcg.from_networkx(G, **kwargs)
-        assert Gcg.id_to_key == [10, 20, 30]  # Remap node IDs to 0, 1, ...
-        cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2])
-        cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0])
-        cp.testing.assert_array_equal(Gcg.edge_values["x"], [1, 2, 1, 2])
-        assert sorted(Gcg.edge_masks) == ["y"]
-        cp.testing.assert_array_equal(Gcg.edge_masks["y"], [False, True, False, True])
-        cp.testing.assert_array_equal(
-            Gcg.edge_values["y"][Gcg.edge_masks["y"]], [1.5, 1.5]
-        )
-        H = nxcg.to_networkx(Gcg)
-        assert G.adj == H.adj
-
-    # Some nodes have masks, some don't
-    for kwargs in [
-        {"preserve_node_attrs": True},
-        {"preserve_all_attrs": True},
-        {"node_attrs": {"foo": None, "bar": None}},
-        {"node_attrs": {"foo": None, "bar": None}},
-        {"node_attrs": {"foo": 0, "bar": None, "missing": None}},
-    ]:
-        Gcg = nxcg.from_networkx(G, **kwargs)
-        assert Gcg.id_to_key == [10, 20, 30]  # Remap node IDs to 0, 1, ...
-        cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2])
-        cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0])
-        cp.testing.assert_array_equal(Gcg.node_values["foo"], [100, 200, 300])
-        assert sorted(Gcg.node_masks) == ["bar"]
-        cp.testing.assert_array_equal(Gcg.node_masks["bar"], [False, True, False])
-        cp.testing.assert_array_equal(
-            Gcg.node_values["bar"][Gcg.node_masks["bar"]], [1000]
-        )
-        H = nxcg.to_networkx(Gcg)
-        assert G.nodes == H.nodes
-
-    # Check default values for nodes
-    for kwargs in [
-        {"node_attrs": {"foo": None, "bar": 0}},
-        {"node_attrs": {"foo": None, "bar": 0, "missing": None}},
-        {"node_attrs": {"bar": 0}},
-        {"node_attrs": {"bar": 0}, "node_dtypes": {"bar": int}},
-        {"node_attrs": {"bar": 0, "foo": None}, "node_dtypes": int},
-    ]:
-        Gcg = nxcg.from_networkx(G, **kwargs)
-        assert Gcg.id_to_key == [10, 20, 30]  # Remap node IDs to 0, 1, ...
-        cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2])
-        cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0])
-        cp.testing.assert_array_equal(Gcg.node_values["bar"], [0, 1000, 0])
-        assert Gcg.node_masks == {}
-
-    with pytest.raises(
-        TypeError, match="edge_attrs and weight arguments should not both be given"
-    ):
-        interface.BackendInterface.convert_from_nx(G, edge_attrs={"x": 1}, weight="x")
-    with pytest.raises(TypeError, match="Expected networkx.Graph"):
-        nxcg.from_networkx({})
-
-
-@pytest.mark.parametrize("graph_class", [nx.MultiGraph, nx.MultiDiGraph])
-def test_multigraph(graph_class):
-    G = graph_class()
-    G.add_edge(0, 1, "key1", x=10)
-    G.add_edge(0, 1, "key2", y=20)
-    Gcg = nxcg.from_networkx(G, preserve_edge_attrs=True)
-    H = nxcg.to_networkx(Gcg)
-    assert type(G) is type(H)
-    assert nx.utils.graphs_equal(G, H)
-
-
-def test_to_dict_of_lists():
-    G = nx.MultiGraph()
-    G.add_edge("a", "b")
-    G.add_edge("a", "c")
-    G.add_edge("a", "b")
-    expected = nx.to_dict_of_lists(G)
-    result = nxcg.to_dict_of_lists(G)
-    assert expected == result
-    expected = nx.to_dict_of_lists(G, nodelist=["a", "b"])
-    result = nxcg.to_dict_of_lists(G, nodelist=["a", "b"])
-    assert expected == result
-    with pytest.raises(nx.NetworkXError, match="The node d is not in the graph"):
-        nx.to_dict_of_lists(G, nodelist=["a", "d"])
-    with pytest.raises(nx.NetworkXError, match="The node d is not in the graph"):
-        nxcg.to_dict_of_lists(G, nodelist=["a", "d"])
-    G.add_node("d")  # No edges
-    expected = nx.to_dict_of_lists(G)
-    result = nxcg.to_dict_of_lists(G)
-    assert expected == result
-    expected = nx.to_dict_of_lists(G, nodelist=["a", "d"])
-    result = nxcg.to_dict_of_lists(G, nodelist=["a", "d"])
-    assert expected == result
-    # Now try with default node ids
-    G = nx.DiGraph()
-    G.add_edge(0, 1)
-    G.add_edge(0, 2)
-    expected = nx.to_dict_of_lists(G)
-    result = nxcg.to_dict_of_lists(G)
-    assert expected == result
-    expected = nx.to_dict_of_lists(G, nodelist=[0, 1])
-    result = nxcg.to_dict_of_lists(G, nodelist=[0, 1])
-    assert expected == result
-    with pytest.raises(nx.NetworkXError, match="The node 3 is not in the digraph"):
-        nx.to_dict_of_lists(G, nodelist=[0, 3])
-    with pytest.raises(nx.NetworkXError, match="The node 3 is not in the digraph"):
-        nxcg.to_dict_of_lists(G, nodelist=[0, 3])
-    G.add_node(3)  # No edges
-    expected = nx.to_dict_of_lists(G)
-    result = nxcg.to_dict_of_lists(G)
-    assert expected == result
-    expected = nx.to_dict_of_lists(G, nodelist=[0, 3])
-    result = nxcg.to_dict_of_lists(G, nodelist=[0, 3])
-    assert expected == result
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert_matrix.py b/python/nx-cugraph/nx_cugraph/tests/test_convert_matrix.py
deleted file mode 100644
index 0a9cc087ce0..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_convert_matrix.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pandas as pd
-import pytest
-
-import nx_cugraph as nxcg
-from nx_cugraph.utils import _cp_iscopied_asarray
-
-try:
-    import cudf
-except ModuleNotFoundError:
-    cudf = None
-
-
-DATA = [
-    {"source": [0, 1], "target": [1, 2]},  # nodes are 0, 1, 2
-    {"source": [0, 1], "target": [1, 3]},  # nodes are 0, 1, 3 (need renumbered!)
-    {"source": ["a", "b"], "target": ["b", "c"]},  # nodes are 'a', 'b', 'c'
-]
-CREATE_USING = [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-
-
-@pytest.mark.skipif("not cudf")
-@pytest.mark.parametrize("data", DATA)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def test_from_cudf_edgelist(data, create_using):
-    df = cudf.DataFrame(data)
-    nxcg.from_pandas_edgelist(df, create_using=create_using)  # Basic smoke test
-    source = df["source"]
-    if source.dtype == int:
-        is_copied, src_array = _cp_iscopied_asarray(source)
-        assert is_copied is False
-        is_copied, src_array = _cp_iscopied_asarray(source.to_cupy())
-        assert is_copied is False
-        is_copied, src_array = _cp_iscopied_asarray(source, orig_object=source)
-        assert is_copied is False
-        is_copied, src_array = _cp_iscopied_asarray(
-            source.to_cupy(), orig_object=source
-        )
-        assert is_copied is False
-        # to numpy
-        is_copied, src_array = _cp_iscopied_asarray(source.to_numpy())
-        assert is_copied is True
-        is_copied, src_array = _cp_iscopied_asarray(
-            source.to_numpy(), orig_object=source
-        )
-        assert is_copied is True
-    else:
-        with pytest.raises(TypeError):
-            _cp_iscopied_asarray(source)
-        with pytest.raises(TypeError):
-            _cp_iscopied_asarray(source.to_cupy())
-        with pytest.raises(ValueError, match="Unsupported dtype"):
-            _cp_iscopied_asarray(source.to_numpy())
-        with pytest.raises(ValueError, match="Unsupported dtype"):
-            _cp_iscopied_asarray(source.to_numpy(), orig_object=source)
-
-
-@pytest.mark.parametrize("data", DATA)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def test_from_pandas_edgelist(data, create_using):
-    df = pd.DataFrame(data)
-    nxcg.from_pandas_edgelist(df, create_using=create_using)  # Basic smoke test
-    source = df["source"]
-    if source.dtype == int:
-        is_copied, src_array = _cp_iscopied_asarray(source)
-        assert is_copied is True
-        is_copied, src_array = _cp_iscopied_asarray(source, orig_object=source)
-        assert is_copied is True
-        is_copied, src_array = _cp_iscopied_asarray(source.to_numpy())
-        assert is_copied is True
-        is_copied, src_array = _cp_iscopied_asarray(
-            source.to_numpy(), orig_object=source
-        )
-        assert is_copied is True
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py b/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py
deleted file mode 100644
index f3d0a8d3767..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from .testing_utils import assert_graphs_equal
-
-if _nxver < (3, 2):
-    pytest.skip("Need NetworkX >=3.2 to test ego_graph", allow_module_level=True)
-
-
-@pytest.mark.parametrize(
-    "create_using", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-)
-@pytest.mark.parametrize("radius", [-1, 0, 1, 1.5, 2, float("inf"), None])
-@pytest.mark.parametrize("center", [True, False])
-@pytest.mark.parametrize("undirected", [False, True])
-@pytest.mark.parametrize("multiple_edges", [False, True])
-@pytest.mark.parametrize("n", [0, 3])
-def test_ego_graph_cycle_graph(
-    create_using, radius, center, undirected, multiple_edges, n
-):
-    Gnx = nx.cycle_graph(7, create_using=create_using)
-    if multiple_edges:
-        # Test multigraph with multiple edges
-        if not Gnx.is_multigraph():
-            return
-        Gnx.add_edges_from(nx.cycle_graph(7, create_using=nx.DiGraph).edges)
-        Gnx.add_edge(0, 1, 10)
-    Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True)
-    assert_graphs_equal(Gnx, Gcg)  # Sanity check
-
-    kwargs = {"radius": radius, "center": center, "undirected": undirected}
-    Hnx = nx.ego_graph(Gnx, n, **kwargs)
-    Hcg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph")
-    use_compat_graphs = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs
-    assert_graphs_equal(Hnx, Hcg._cudagraph if use_compat_graphs else Hcg)
-    Hcg = nx.ego_graph(Gcg, n, **kwargs)
-    assert_graphs_equal(Hnx, Hcg)
-    Hcg = nx.ego_graph(Gcg._to_compat_graph(), n, **kwargs)
-    assert_graphs_equal(Hnx, Hcg._cudagraph)
-    with pytest.raises(nx.NodeNotFound, match="not in G"):
-        nx.ego_graph(Gnx, -1, **kwargs)
-    with pytest.raises(nx.NodeNotFound, match="not in G"):
-        nx.ego_graph(Gnx, -1, **kwargs, backend="cugraph")
-    # Using sssp with default weight of 1 should give same answer as bfs
-    nx.set_edge_attributes(Gnx, 1, name="weight")
-    Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True)
-    assert_graphs_equal(Gnx, Gcg)  # Sanity check
-
-    kwargs["distance"] = "weight"
-    H2nx = nx.ego_graph(Gnx, n, **kwargs)
-    is_nx32 = _nxver[:2] == (3, 2)
-    if undirected and Gnx.is_directed() and Gnx.is_multigraph():
-        if is_nx32:
-            # `should_run` was added in nx 3.3
-            match = "Weighted ego_graph with undirected=True not implemented"
-        elif _nxver >= (3, 4):
-            match = "not implemented by 'cugraph'"
-        else:
-            match = "not implemented by cugraph"
-        with pytest.raises(
-            RuntimeError if _nxver < (3, 4) else NotImplementedError, match=match
-        ):
-            nx.ego_graph(Gnx, n, **kwargs, backend="cugraph")
-        with pytest.raises(NotImplementedError, match="ego_graph"):
-            nx.ego_graph(Gcg, n, **kwargs, backend="cugraph")
-        if _nxver < (3, 4) or not nx.config.fallback_to_nx:
-            with pytest.raises(NotImplementedError, match="ego_graph"):
-                nx.ego_graph(Gcg, n, **kwargs)
-        else:
-            # This is an interesting case. `nxcg.ego_graph` is not implemented for
-            # these arguments, so it falls back to networkx. Hence, as it is currently
-            # implemented, the input graph is `nxcg.CudaGraph`, but the output graph
-            # is `nx.Graph`. Should networkx convert back to "cugraph" backend?
-            H2cg = nx.ego_graph(Gcg, n, **kwargs)
-            assert type(H2nx) is type(H2cg)
-            assert_graphs_equal(H2nx, nxcg.from_networkx(H2cg, preserve_all_attrs=True))
-    else:
-        H2cg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph")
-        assert_graphs_equal(H2nx, H2cg._cudagraph if use_compat_graphs else H2cg)
-        with pytest.raises(nx.NodeNotFound, match="not found in graph"):
-            nx.ego_graph(Gnx, -1, **kwargs)
-        with pytest.raises(nx.NodeNotFound, match="not found in graph"):
-            nx.ego_graph(Gnx, -1, **kwargs, backend="cugraph")
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_generators.py b/python/nx-cugraph/nx_cugraph/tests/test_generators.py
deleted file mode 100644
index 5c405f1c93b..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_generators.py
+++ /dev/null
@@ -1,282 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import numpy as np
-import pytest
-
-import nx_cugraph as nxcg
-from nx_cugraph import _nxver
-
-from .testing_utils import assert_graphs_equal
-
-if _nxver < (3, 2):
-    pytest.skip("Need NetworkX >=3.2 to test generators", allow_module_level=True)
-
-
-def compare(name, create_using, *args, is_vanilla=False):
-    exc1 = exc2 = None
-    func = getattr(nx, name)
-    if isinstance(create_using, nxcg.CudaGraph):
-        nx_create_using = nxcg.to_networkx(create_using)
-    elif isinstance(create_using, type) and issubclass(
-        create_using, (nxcg.Graph, nxcg.CudaGraph)
-    ):
-        nx_create_using = create_using.to_networkx_class()
-    elif isinstance(create_using, nx.Graph):
-        nx_create_using = create_using.copy()
-    else:
-        nx_create_using = create_using
-    try:
-        if is_vanilla:
-            G = func(*args)
-        else:
-            G = func(*args, create_using=nx_create_using)
-    except Exception as exc:
-        exc1 = exc
-    try:
-        if is_vanilla:
-            Gcg = func(*args, backend="cugraph")
-        else:
-            Gcg = func(*args, create_using=create_using, backend="cugraph")
-    except ZeroDivisionError:
-        raise
-    except NotImplementedError as exc:
-        if name in {"complete_multipartite_graph"}:  # nx.__version__[:3] <= "3.2"
-            return
-        exc2 = exc
-    except Exception as exc:
-        if exc1 is None:  # pragma: no cover (debug)
-            raise
-        exc2 = exc
-    if exc1 is not None or exc2 is not None:
-        assert type(exc1) is type(exc2)
-        return
-    if isinstance(Gcg, nxcg.Graph):
-        # If the graph is empty, it may be on host, otherwise it should be on device
-        if len(G):
-            assert Gcg._is_on_gpu
-            assert not Gcg._is_on_cpu
-        assert_graphs_equal(G, Gcg._cudagraph)
-    else:
-        assert_graphs_equal(G, Gcg)
-    # Ensure the output type is correct
-    if is_vanilla:
-        if _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs:
-            assert isinstance(Gcg, nxcg.Graph)
-        else:
-            assert isinstance(Gcg, nxcg.CudaGraph)
-    elif isinstance(create_using, type) and issubclass(
-        create_using, (nxcg.Graph, nxcg.CudaGraph)
-    ):
-        assert type(Gcg) is create_using
-    elif isinstance(create_using, (nxcg.Graph, nxcg.CudaGraph)):
-        assert type(Gcg) is type(create_using)
-
-
-N = list(range(-1, 5))
-CREATE_USING = [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-COMPLETE_CREATE_USING = [
-    nx.Graph,
-    nx.DiGraph,
-    nx.MultiGraph,
-    nx.MultiDiGraph,
-    nxcg.Graph,
-    nxcg.DiGraph,
-    nxcg.MultiGraph,
-    nxcg.MultiDiGraph,
-    nxcg.CudaGraph,
-    nxcg.CudaDiGraph,
-    nxcg.CudaMultiGraph,
-    nxcg.CudaMultiDiGraph,
-    # These raise NotImplementedError
-    # nx.Graph(),
-    # nx.DiGraph(),
-    # nx.MultiGraph(),
-    # nx.MultiDiGraph(),
-    nxcg.Graph(),
-    nxcg.DiGraph(),
-    nxcg.MultiGraph(),
-    nxcg.MultiDiGraph(),
-    nxcg.CudaGraph(),
-    nxcg.CudaDiGraph(),
-    nxcg.CudaMultiGraph(),
-    nxcg.CudaMultiDiGraph(),
-    None,
-    object,  # Bad input
-    7,  # Bad input
-]
-GENERATORS_NOARG = [
-    # classic
-    "null_graph",
-    "trivial_graph",
-    # small
-    "bull_graph",
-    "chvatal_graph",
-    "cubical_graph",
-    "desargues_graph",
-    "diamond_graph",
-    "dodecahedral_graph",
-    "frucht_graph",
-    "heawood_graph",
-    "house_graph",
-    "house_x_graph",
-    "icosahedral_graph",
-    "krackhardt_kite_graph",
-    "moebius_kantor_graph",
-    "octahedral_graph",
-    "petersen_graph",
-    "sedgewick_maze_graph",
-    "tetrahedral_graph",
-    "truncated_cube_graph",
-    "truncated_tetrahedron_graph",
-    "tutte_graph",
-]
-GENERATORS_NOARG_VANILLA = [
-    # classic
-    "complete_multipartite_graph",
-    # small
-    "pappus_graph",
-    # social
-    "davis_southern_women_graph",
-    "florentine_families_graph",
-    "karate_club_graph",
-    "les_miserables_graph",
-]
-GENERATORS_N = [
-    # classic
-    "circular_ladder_graph",
-    "complete_graph",
-    "cycle_graph",
-    "empty_graph",
-    "ladder_graph",
-    "path_graph",
-    "star_graph",
-    "wheel_graph",
-]
-GENERATORS_M_N = [
-    # classic
-    "barbell_graph",
-    "lollipop_graph",
-    "tadpole_graph",
-    # bipartite
-    "complete_bipartite_graph",
-]
-GENERATORS_M_N_VANILLA = [
-    # classic
-    "complete_multipartite_graph",
-    "turan_graph",
-    # community
-    "caveman_graph",
-]
-
-
-@pytest.mark.parametrize("name", GENERATORS_NOARG)
-@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING)
-def test_generator_noarg(name, create_using):
-    print(name, create_using, type(create_using))
-    if isinstance(create_using, nxcg.CudaGraph) and name in {
-        # fmt: off
-        "bull_graph", "chvatal_graph", "cubical_graph", "diamond_graph",
-        "house_graph", "house_x_graph", "icosahedral_graph", "krackhardt_kite_graph",
-        "octahedral_graph", "petersen_graph", "truncated_cube_graph", "tutte_graph",
-        # fmt: on
-    }:
-        # The _raise_on_directed decorator used in networkx doesn't like our graphs.
-        if create_using.is_directed():
-            with pytest.raises(AssertionError):
-                compare(name, create_using)
-        else:
-            with pytest.raises(TypeError):
-                compare(name, create_using)
-    else:
-        compare(name, create_using)
-
-
-@pytest.mark.parametrize("name", GENERATORS_NOARG_VANILLA)
-def test_generator_noarg_vanilla(name):
-    print(name)
-    compare(name, None, is_vanilla=True)
-
-
-@pytest.mark.parametrize("name", GENERATORS_N)
-@pytest.mark.parametrize("n", N)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-def test_generator_n(name, n, create_using):
-    print(name, n, create_using)
-    compare(name, create_using, n)
-
-
-@pytest.mark.parametrize("name", GENERATORS_N)
-@pytest.mark.parametrize("n", [1, 4])
-@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING)
-def test_generator_n_complete(name, n, create_using):
-    print(name, n, create_using)
-    compare(name, create_using, n)
-
-
-@pytest.mark.parametrize("name", GENERATORS_M_N)
-@pytest.mark.parametrize("create_using", CREATE_USING)
-@pytest.mark.parametrize("m", N)
-@pytest.mark.parametrize("n", N)
-def test_generator_m_n(name, create_using, m, n):
-    print(name, m, n, create_using)
-    compare(name, create_using, m, n)
-
-
-@pytest.mark.parametrize("name", GENERATORS_M_N_VANILLA)
-@pytest.mark.parametrize("m", N)
-@pytest.mark.parametrize("n", N)
-def test_generator_m_n_vanilla(name, m, n):
-    print(name, m, n)
-    compare(name, None, m, n, is_vanilla=True)
-
-
-@pytest.mark.parametrize("name", GENERATORS_M_N)
-@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING)
-@pytest.mark.parametrize("m", [4])
-@pytest.mark.parametrize("n", [4])
-def test_generator_m_n_complete(name, create_using, m, n):
-    print(name, m, n, create_using)
-    compare(name, create_using, m, n)
-
-
-@pytest.mark.parametrize("name", GENERATORS_M_N_VANILLA)
-@pytest.mark.parametrize("m", [4])
-@pytest.mark.parametrize("n", [4])
-def test_generator_m_n_complete_vanilla(name, m, n):
-    print(name, m, n)
-    compare(name, None, m, n, is_vanilla=True)
-
-
-def test_bad_lollipop_graph():
-    compare("lollipop_graph", None, [0, 1], [1, 2])
-
-
-def test_can_convert_karate_club():
-    # Karate club graph has string node values.
-    # This really tests conversions, but it's here so we can use `assert_graphs_equal`.
-    G = nx.karate_club_graph()
-    G.add_node(0, foo="bar")  # string dtype with a mask
-    G.add_node(1, object=object())  # haha
-    Gcg = nxcg.from_networkx(G, preserve_all_attrs=True)
-    assert_graphs_equal(G, Gcg)
-    Gnx = nxcg.to_networkx(Gcg)
-    assert nx.utils.graphs_equal(G, Gnx)
-    assert isinstance(Gcg.node_values["club"], np.ndarray)
-    assert Gcg.node_values["club"].dtype.kind == "U"
-    assert isinstance(Gcg.node_values["foo"], np.ndarray)
-    assert isinstance(Gcg.node_masks["foo"], np.ndarray)
-    assert Gcg.node_values["foo"].dtype.kind == "U"
-    assert isinstance(Gcg.node_values["object"], np.ndarray)
-    assert Gcg.node_values["object"].dtype.kind == "O"
-    assert isinstance(Gcg.node_masks["object"], np.ndarray)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py b/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py
deleted file mode 100644
index 40a361b1084..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-
-from .testing_utils import assert_graphs_equal
-
-
-def _create_Gs():
-    rv = []
-    rv.append(nx.DiGraph())
-    G = nx.DiGraph()
-    G.add_edge(0, 1)
-    G.add_edge(1, 0)
-    rv.append(G)
-    G = G.copy()
-    G.add_edge(0, 2)
-    rv.append(G)
-    G = G.copy()
-    G.add_edge(1, 1)
-    rv.append(G)
-    G = nx.DiGraph()
-    G.add_edge(0, 1, x=1, y=2)
-    G.add_edge(1, 0, x=10, z=3)
-    rv.append(G)
-    G = G.copy()
-    G.add_edge(0, 2, a=42)
-    rv.append(G)
-    G = G.copy()
-    G.add_edge(1, 1, a=4)
-    rv.append(G)
-    return rv
-
-
-@pytest.mark.parametrize("Gnx", _create_Gs())
-@pytest.mark.parametrize("reciprocal", [False, True])
-def test_to_undirected_directed(Gnx, reciprocal):
-    Gcg = nxcg.CudaDiGraph(Gnx)
-    assert_graphs_equal(Gnx, Gcg)
-    Hnx1 = Gnx.to_undirected(reciprocal=reciprocal)
-    Hcg1 = Gcg.to_undirected(reciprocal=reciprocal)
-    assert_graphs_equal(Hnx1, Hcg1)
-    Hnx2 = Hnx1.to_directed()
-    Hcg2 = Hcg1.to_directed()
-    assert_graphs_equal(Hnx2, Hcg2)
-
-
-def test_multidigraph_to_undirected():
-    Gnx = nx.MultiDiGraph()
-    Gnx.add_edge(0, 1)
-    Gnx.add_edge(0, 1)
-    Gnx.add_edge(1, 0)
-    Gcg = nxcg.CudaMultiDiGraph(Gnx)
-    with pytest.raises(NotImplementedError):
-        Gcg.to_undirected()
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py b/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py
deleted file mode 100644
index 92fe2360688..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-
-
-@pytest.mark.parametrize(
-    "get_graph", [nx.florentine_families_graph, nx.les_miserables_graph]
-)
-def test_k_truss(get_graph):
-    Gnx = get_graph()
-    Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True)
-    for k in range(6):
-        Hnx = nx.k_truss(Gnx, k)
-        Hcg = nxcg.k_truss(Gcg, k)
-        assert nx.utils.graphs_equal(Hnx, nxcg.to_networkx(Hcg))
-        if Hnx.number_of_edges() == 0:
-            break
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py
deleted file mode 100644
index 1a61c69b3e7..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import importlib
-import inspect
-
-import networkx as nx
-
-import nx_cugraph as nxcg
-from nx_cugraph.utils import networkx_algorithm
-
-
-def test_match_signature_and_names():
-    """Simple test to ensure our signatures and basic module layout match networkx."""
-    for name, func in vars(nxcg.interface.BackendInterface).items():
-        if not isinstance(func, networkx_algorithm):
-            continue
-
-        # nx version >=3.2 uses utils.backends, version >=3.0,<3.2 uses classes.backends
-        is_nx_30_or_31 = hasattr(nx.classes, "backends")
-        nx_backends = nx.classes.backends if is_nx_30_or_31 else nx.utils.backends
-
-        if is_nx_30_or_31 and name in {"louvain_communities"}:
-            continue
-        if name not in nx_backends._registered_algorithms:
-            print(f"{name} not dispatched from networkx")
-            continue
-        dispatchable_func = nx_backends._registered_algorithms[name]
-        # nx version >=3.2 uses orig_func, version >=3.0,<3.2 uses _orig_func
-        if is_nx_30_or_31:
-            orig_func = dispatchable_func._orig_func
-        else:
-            orig_func = dispatchable_func.orig_func
-
-        # Matching signatures?
-        orig_sig = inspect.signature(orig_func)
-        func_sig = inspect.signature(func)
-        if not func.extra_params:
-            assert orig_sig == func_sig, name
-        else:
-            # Ignore extra parameters added to nx-cugraph algorithm
-            # The key of func.extra_params may be like "max_level : int, optional",
-            # but we only want "max_level" here.
-            extra_params = {name.split(" ")[0] for name in func.extra_params}
-            assert orig_sig == func_sig.replace(
-                parameters=[
-                    p
-                    for name, p in func_sig.parameters.items()
-                    if name not in extra_params
-                ]
-            ), name
-        if func.can_run is not nxcg.utils.decorators._default_can_run:
-            assert func_sig == inspect.signature(func.can_run), name
-        if func.should_run is not nxcg.utils.decorators._default_should_run:
-            assert func_sig == inspect.signature(func.should_run), name
-
-        # Matching function names?
-        assert func.__name__ == dispatchable_func.__name__ == orig_func.__name__, name
-
-        # Matching dispatch names?
-        # nx version >=3.2 uses name, version >=3.0,<3.2 uses dispatchname
-        if is_nx_30_or_31:
-            dispatchname = dispatchable_func.dispatchname
-        else:
-            dispatchname = dispatchable_func.name
-        assert func.name == dispatchname, name
-
-        # Matching modules (i.e., where function defined)?
-        assert (
-            "networkx." + func.__module__.split(".", 1)[1]
-            == dispatchable_func.__module__
-            == orig_func.__module__
-        ), name
-
-        # Matching package layout (i.e., which modules have the function)?
-        nxcg_path = func.__module__
-        name = func.__name__
-        while "." in nxcg_path:
-            # This only walks up the module tree and does not check sibling modules
-            nxcg_path, mod_name = nxcg_path.rsplit(".", 1)
-            nx_path = nxcg_path.replace("nx_cugraph", "networkx")
-            nxcg_mod = importlib.import_module(nxcg_path)
-            nx_mod = importlib.import_module(nx_path)
-            # Is the function present in the current module?
-            present_in_nxcg = hasattr(nxcg_mod, name)
-            present_in_nx = hasattr(nx_mod, name)
-            if present_in_nxcg is not present_in_nx:  # pragma: no cover (debug)
-                if present_in_nxcg:
-                    raise AssertionError(
-                        f"{name} exists in {nxcg_path}, but not in {nx_path}"
-                    )
-                raise AssertionError(
-                    f"{name} exists in {nx_path}, but not in {nxcg_path}"
-                )
-            # Is the nested module present in the current module?
-            present_in_nxcg = hasattr(nxcg_mod, mod_name)
-            present_in_nx = hasattr(nx_mod, mod_name)
-            if present_in_nxcg is not present_in_nx:  # pragma: no cover (debug)
-                if present_in_nxcg:
-                    raise AssertionError(
-                        f"{mod_name} exists in {nxcg_path}, but not in {nx_path}"
-                    )
-                raise AssertionError(
-                    f"{mod_name} exists in {nx_path}, but not in {nxcg_path}"
-                )
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py b/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py
deleted file mode 100644
index 9208eea09f2..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-
-
-@pytest.mark.parametrize("test_nxcugraph", [False, True])
-def test_get_edge_data(test_nxcugraph):
-    G = nx.MultiGraph()
-    G.add_edge(0, 1, 0, x=10)
-    G.add_edge(0, 1, 1, y=20)
-    G.add_edge(0, 2, "a", x=100)
-    G.add_edge(0, 2, "b", y=200)
-    G.add_edge(0, 3)
-    G.add_edge(0, 3)
-    if test_nxcugraph:
-        G = nxcg.CudaMultiGraph(G)
-    default = object()
-    assert G.get_edge_data(0, 0, default=default) is default
-    assert G.get_edge_data("a", "b", default=default) is default
-    assert G.get_edge_data(0, 1, 2, default=default) is default
-    assert G.get_edge_data(-1, 1, default=default) is default
-    assert G.get_edge_data(0, 1, 0, default=default) == {"x": 10}
-    assert G.get_edge_data(0, 1, 1, default=default) == {"y": 20}
-    assert G.get_edge_data(0, 1, default=default) == {0: {"x": 10}, 1: {"y": 20}}
-    assert G.get_edge_data(0, 2, "a", default=default) == {"x": 100}
-    assert G.get_edge_data(0, 2, "b", default=default) == {"y": 200}
-    assert G.get_edge_data(0, 2, default=default) == {"a": {"x": 100}, "b": {"y": 200}}
-    assert G.get_edge_data(0, 3, 0, default=default) == {}
-    assert G.get_edge_data(0, 3, 1, default=default) == {}
-    assert G.get_edge_data(0, 3, 2, default=default) is default
-    assert G.get_edge_data(0, 3, default=default) == {0: {}, 1: {}}
-    assert G.has_edge(0, 1)
-    assert G.has_edge(0, 1, 0)
-    assert G.has_edge(0, 1, 1)
-    assert not G.has_edge(0, 1, 2)
-    assert not G.has_edge(0, 1, "a")
-    assert not G.has_edge(0, -1)
-    assert G.has_edge(0, 2)
-    assert G.has_edge(0, 2, "a")
-    assert G.has_edge(0, 2, "b")
-    assert not G.has_edge(0, 2, "c")
-    assert not G.has_edge(0, 2, 0)
-    assert G.has_edge(0, 3)
-    assert not G.has_edge(0, 0)
-    assert not G.has_edge(0, 0, 0)
-
-    G = nx.MultiGraph()
-    G.add_edge(0, 1)
-    if test_nxcugraph:
-        G = nxcg.CudaMultiGraph(G)
-    assert G.get_edge_data(0, 1, default=default) == {0: {}}
-    assert G.get_edge_data(0, 1, 0, default=default) == {}
-    assert G.get_edge_data(0, 1, 1, default=default) is default
-    assert G.get_edge_data(0, 1, "b", default=default) is default
-    assert G.get_edge_data(-1, 2, default=default) is default
-    assert G.has_edge(0, 1)
-    assert G.has_edge(0, 1, 0)
-    assert not G.has_edge(0, 1, 1)
-    assert not G.has_edge(0, 1, "a")
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py
deleted file mode 100644
index 252f9e6bbb8..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pandas as pd
-import pytest
-
-
-def test_pagerank_multigraph():
-    """
-    Ensures correct pagerank for Graphs and MultiGraphs when using from_pandas_edgelist.
-
-    PageRank for MultiGraph should give different result compared to Graph; when using
-    a Graph, the duplicate edges should be dropped.
-    """
-    df = pd.DataFrame(
-        {"source": [0, 1, 1, 1, 1, 1, 1, 2], "target": [1, 2, 2, 2, 2, 2, 2, 3]}
-    )
-    expected_pr_for_G = nx.pagerank(nx.from_pandas_edgelist(df))
-    expected_pr_for_MultiG = nx.pagerank(
-        nx.from_pandas_edgelist(df, create_using=nx.MultiGraph)
-    )
-
-    G = nx.from_pandas_edgelist(df, backend="cugraph")
-    actual_pr_for_G = nx.pagerank(G, backend="cugraph")
-
-    MultiG = nx.from_pandas_edgelist(df, create_using=nx.MultiGraph, backend="cugraph")
-    actual_pr_for_MultiG = nx.pagerank(MultiG, backend="cugraph")
-
-    assert actual_pr_for_G == pytest.approx(expected_pr_for_G)
-    assert actual_pr_for_MultiG == pytest.approx(expected_pr_for_MultiG)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_relabel.py b/python/nx-cugraph/nx_cugraph/tests/test_relabel.py
deleted file mode 100644
index 40bf851d376..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_relabel.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-import pytest
-
-import nx_cugraph as nxcg
-
-from .testing_utils import assert_graphs_equal
-
-
-@pytest.mark.parametrize(
-    "create_using", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
-)
-def test_relabel(create_using):
-    G = nx.complete_graph(3, create_using=create_using)
-    Hnx = nx.relabel_nodes(G, {2: 1})
-    Hcg = nxcg.relabel_nodes(G, {2: 1})
-    assert_graphs_equal(Hnx, Hcg)
-
-    G.add_edge(0, 2, a=11)
-    G.add_edge(1, 2, b=22)
-    Hnx = nx.relabel_nodes(G, {2: 10, 1: 10})
-    Hcg = nxcg.relabel_nodes(G, {2: 10, 1: 10})
-    assert_graphs_equal(Hnx, Hcg)
-
-    G = nx.path_graph(3, create_using=create_using)
-    Hnx = nx.relabel_nodes(G, {2: 0})
-    Hcg = nxcg.relabel_nodes(G, {2: 0})
-    assert_graphs_equal(Hnx, Hcg)
-
-
-@pytest.mark.parametrize("create_using", [nx.MultiGraph, nx.MultiDiGraph])
-def test_relabel_multigraph(create_using):
-    G = nx.empty_graph(create_using=create_using)
-    G.add_edge(0, 1, "x", a=11)
-    G.add_edge(0, 2, "y", a=10, b=6)
-    G.add_edge(0, 0, c=7)
-    G.add_edge(0, 0, "x", a=-1, b=-1, c=-1)
-    Hnx = nx.relabel_nodes(G, {0: 1, 2: 1})
-    Hcg = nxcg.relabel_nodes(G, {0: 1, 2: 1})
-    assert_graphs_equal(Hnx, Hcg)
-    Hnx = nx.relabel_nodes(G, {2: 3, 1: 3, 0: 3})
-    Hcg = nxcg.relabel_nodes(G, {2: 3, 1: 3, 0: 3})
-    assert_graphs_equal(Hnx, Hcg)
-
-
-def test_relabel_nx_input():
-    G = nx.complete_graph(3)
-    with pytest.raises(RuntimeError, match="Using `copy=False` is invalid"):
-        nxcg.relabel_nodes(G, {0: 1}, copy=False)
-    Hnx = nx.relabel_nodes(G, {0: 1}, copy=True)
-    Hcg = nxcg.relabel_nodes(G, {0: 1}, copy=True)
-    assert_graphs_equal(Hnx, Hcg)
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_utils.py b/python/nx-cugraph/nx_cugraph/tests/test_utils.py
deleted file mode 100644
index d38a286fa5d..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_utils.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import cupy as cp
-import numpy as np
-import pytest
-
-from nx_cugraph.utils import _cp_iscopied_asarray, _get_int_dtype
-
-
-def test_get_int_dtype():
-    uint8 = np.dtype(np.uint8)
-    uint16 = np.dtype(np.uint16)
-    uint32 = np.dtype(np.uint32)
-    uint64 = np.dtype(np.uint64)
-    # signed
-    cur = np.iinfo(np.int8)
-    for val in [cur.min, cur.min + 1, -1, 0, 1, cur.max - 1, cur.max]:
-        assert _get_int_dtype(val) == np.int8
-        assert _get_int_dtype(val, signed=True) == np.int8
-        if val >= 0:
-            assert _get_int_dtype(val, unsigned=True) == np.uint8
-            assert _get_int_dtype(val + 1, unsigned=True) == np.uint8
-    prev = cur
-    cur = np.iinfo(np.int16)
-    for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]:
-        assert _get_int_dtype(val) != prev.dtype
-        assert _get_int_dtype(val, signed=True) == np.int16
-        if val >= 0:
-            assert _get_int_dtype(val, unsigned=True) in {uint8, uint16}
-            assert _get_int_dtype(val + 1, unsigned=True) in {uint8, uint16}
-    prev = cur
-    cur = np.iinfo(np.int32)
-    for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]:
-        assert _get_int_dtype(val) != prev.dtype
-        assert _get_int_dtype(val, signed=True) == np.int32
-        if val >= 0:
-            assert _get_int_dtype(val, unsigned=True) in {uint16, uint32}
-            assert _get_int_dtype(val + 1, unsigned=True) in {uint16, uint32}
-    prev = cur
-    cur = np.iinfo(np.int64)
-    for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]:
-        assert _get_int_dtype(val) != prev.dtype
-        assert _get_int_dtype(val, signed=True) == np.int64
-        if val >= 0:
-            assert _get_int_dtype(val, unsigned=True) in {uint32, uint64}
-            assert _get_int_dtype(val + 1, unsigned=True) in {uint32, uint64}
-    with pytest.raises(ValueError, match="Value is too"):
-        _get_int_dtype(cur.min - 1, signed=True)
-    with pytest.raises(ValueError, match="Value is too"):
-        _get_int_dtype(cur.max + 1, signed=True)
-
-    # unsigned
-    cur = np.iinfo(np.uint8)
-    for val in [0, 1, cur.max - 1, cur.max]:
-        assert _get_int_dtype(val) == (np.uint8 if val > 1 else np.int8)
-        assert _get_int_dtype(val, unsigned=True) == np.uint8
-    assert _get_int_dtype(cur.max + 1) == np.int16
-    cur = np.iinfo(np.uint16)
-    for val in [cur.max - 1, cur.max]:
-        assert _get_int_dtype(val, unsigned=True) == np.uint16
-    assert _get_int_dtype(cur.max + 1) == np.int32
-    cur = np.iinfo(np.uint32)
-    for val in [cur.max - 1, cur.max]:
-        assert _get_int_dtype(val, unsigned=True) == np.uint32
-    assert _get_int_dtype(cur.max + 1) == np.int64
-    cur = np.iinfo(np.uint64)
-    for val in [cur.max - 1, cur.max]:
-        assert _get_int_dtype(val, unsigned=True) == np.uint64
-    with pytest.raises(ValueError, match="Value is incompatible"):
-        _get_int_dtype(cur.min - 1, unsigned=True)
-    with pytest.raises(ValueError, match="Value is too"):
-        _get_int_dtype(cur.max + 1, unsigned=True)
-
-    # API
-    with pytest.raises(TypeError, match="incompatible"):
-        _get_int_dtype(7, signed=True, unsigned=True)
-    assert _get_int_dtype(7, signed=True, unsigned=False) == np.int8
-    assert _get_int_dtype(7, signed=False, unsigned=True) == np.uint8
-
-
-def test_cp_iscopied_asarray():
-    # We don't yet run doctest, so do simple copy/paste test here.
-    #
-    # >>> is_copied, a = _cp_iscopied_asarray([1, 2, 3])
-    # >>> is_copied
-    # True
-    # >>> a
-    # array([1, 2, 3])
-    # >>> _cp_iscopied_asarray(a)
-    # (False, array([1, 2, 3]))
-    is_copied, a = _cp_iscopied_asarray([1, 2, 3])
-    assert is_copied is True
-    assert isinstance(a, cp.ndarray)
-    assert repr(a) == "array([1, 2, 3])"
-    assert _cp_iscopied_asarray(a)[0] is False
diff --git a/python/nx-cugraph/nx_cugraph/tests/test_version.py b/python/nx-cugraph/nx_cugraph/tests/test_version.py
deleted file mode 100644
index c45702b6001..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/test_version.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-import nx_cugraph
-
-
-def test_version_constants_are_populated():
-    # __git_commit__ will only be non-empty in a built distribution
-    assert isinstance(nx_cugraph.__git_commit__, str)
-
-    # __version__ should always be non-empty
-    assert isinstance(nx_cugraph.__version__, str)
-    assert len(nx_cugraph.__version__) > 0
diff --git a/python/nx-cugraph/nx_cugraph/tests/testing_utils.py b/python/nx-cugraph/nx_cugraph/tests/testing_utils.py
deleted file mode 100644
index 50836acf55f..00000000000
--- a/python/nx-cugraph/nx_cugraph/tests/testing_utils.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import networkx as nx
-
-import nx_cugraph as nxcg
-
-
-def assert_graphs_equal(Gnx, Gcg):
-    assert isinstance(Gnx, nx.Graph)
-    assert isinstance(Gcg, nxcg.CudaGraph)
-    assert (a := Gnx.number_of_nodes()) == (b := Gcg.number_of_nodes()), (a, b)
-    assert (a := Gnx.number_of_edges()) == (b := Gcg.number_of_edges()), (a, b)
-    assert (a := Gnx.is_directed()) == (b := Gcg.is_directed()), (a, b)
-    assert (a := Gnx.is_multigraph()) == (b := Gcg.is_multigraph()), (a, b)
-    G = nxcg.to_networkx(Gcg)
-    rv = nx.utils.graphs_equal(G, Gnx)
-    if not rv:
-        print("GRAPHS ARE NOT EQUAL!")
-        assert sorted(G) == sorted(Gnx)
-        assert sorted(G._adj) == sorted(Gnx._adj)
-        assert sorted(G._node) == sorted(Gnx._node)
-        for k in sorted(G._adj):
-            print(k, sorted(G._adj[k]), sorted(Gnx._adj[k]))
-        print(nx.to_scipy_sparse_array(G).todense())
-        print(nx.to_scipy_sparse_array(Gnx).todense())
-        print(G.graph)
-        print(Gnx.graph)
-    assert rv
diff --git a/python/nx-cugraph/nx_cugraph/typing.py b/python/nx-cugraph/nx_cugraph/typing.py
deleted file mode 100644
index b419a9085e0..00000000000
--- a/python/nx-cugraph/nx_cugraph/typing.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-from collections.abc import Hashable
-from typing import TypeVar
-
-import cupy as cp
-import numpy as np
-
-AttrKey = TypeVar("AttrKey", bound=Hashable)
-EdgeKey = TypeVar("EdgeKey", bound=Hashable)
-NodeKey = TypeVar("NodeKey", bound=Hashable)
-EdgeTuple = tuple[NodeKey, NodeKey]
-EdgeValue = TypeVar("EdgeValue")
-NodeValue = TypeVar("NodeValue")
-IndexValue = TypeVar("IndexValue")
-Dtype = TypeVar("Dtype")
-
-
-class any_ndarray:
-    def __class_getitem__(cls, item):
-        return cp.ndarray[item] | np.ndarray[item]
diff --git a/python/nx-cugraph/nx_cugraph/utils/__init__.py b/python/nx-cugraph/nx_cugraph/utils/__init__.py
deleted file mode 100644
index 6df5fb60978..00000000000
--- a/python/nx-cugraph/nx_cugraph/utils/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from .decorators import *
-from .misc import *
diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py
deleted file mode 100644
index 16486996ba0..00000000000
--- a/python/nx-cugraph/nx_cugraph/utils/decorators.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-from functools import partial, update_wrapper
-from textwrap import dedent
-
-import networkx as nx
-from networkx import NetworkXError
-from networkx.utils.decorators import nodes_or_number, not_implemented_for
-
-from nx_cugraph import _nxver
-from nx_cugraph.interface import BackendInterface
-
-from .misc import _And_NotImplementedError
-
-try:
-    from networkx.utils.backends import _registered_algorithms
-except ModuleNotFoundError:
-    from networkx.classes.backends import _registered_algorithms
-
-
-__all__ = ["not_implemented_for", "nodes_or_number", "networkx_algorithm"]
-
-
-def networkx_class(api):
-    def inner(func):
-        func.__doc__ = getattr(api, func.__name__).__doc__
-        return func
-
-    return inner
-
-
-class networkx_algorithm:
-    name: str
-    extra_doc: str | None
-    extra_params: dict[str, str] | None
-    version_added: str
-    is_incomplete: bool
-    is_different: bool
-    _fallback: bool
-    _plc_names: set[str] | None
-
-    def __new__(
-        cls,
-        func=None,
-        *,
-        name: str | None = None,
-        # Extra parameter info that is added to NetworkX docstring
-        extra_params: dict[str, str] | str | None = None,
-        # Applies `nodes_or_number` decorator compatibly across versions (3.3 changed)
-        nodes_or_number: list[int] | int | None = None,
-        # Metadata (for introspection only)
-        version_added: str,  # Required
-        is_incomplete: bool = False,  # See self.extra_doc for details if True
-        is_different: bool = False,  # See self.extra_doc for details if True
-        fallback: bool = False,  # Change non-nx exceptions to NotImplementedError
-        _plc: str | set[str] | None = None,  # Hidden from user, may be removed someday
-    ):
-        if func is None:
-            return partial(
-                networkx_algorithm,
-                name=name,
-                extra_params=extra_params,
-                nodes_or_number=nodes_or_number,
-                version_added=version_added,
-                is_incomplete=is_incomplete,
-                is_different=is_different,
-                fallback=fallback,
-                _plc=_plc,
-            )
-        instance = object.__new__(cls)
-        if nodes_or_number is not None and _nxver > (3, 2):
-            func = nx.utils.decorators.nodes_or_number(nodes_or_number)(func)
-        # update_wrapper sets __wrapped__, which will be used for the signature
-        update_wrapper(instance, func)
-        instance.__defaults__ = func.__defaults__
-        instance.__kwdefaults__ = func.__kwdefaults__
-        instance.name = func.__name__ if name is None else name
-        if extra_params is None:
-            pass
-        elif isinstance(extra_params, str):
-            extra_params = {extra_params: ""}
-        elif not isinstance(extra_params, dict):
-            raise TypeError(
-                f"extra_params must be dict, str, or None; got {type(extra_params)}"
-            )
-        instance.extra_params = extra_params
-        if _plc is None or isinstance(_plc, set):
-            instance._plc_names = _plc
-        elif isinstance(_plc, str):
-            instance._plc_names = {_plc}
-        else:
-            raise TypeError(
-                f"_plc argument must be str, set, or None; got {type(_plc)}"
-            )
-        instance.version_added = version_added
-        instance.is_incomplete = is_incomplete
-        instance.is_different = is_different
-        instance.fallback = fallback
-        # The docstring on our function is added to the NetworkX docstring.
-        instance.extra_doc = (
-            dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None
-        )
-        # Copy __doc__ from NetworkX
-        if instance.name in _registered_algorithms:
-            instance.__doc__ = _registered_algorithms[instance.name].__doc__
-        instance.can_run = _default_can_run
-        instance.should_run = _default_should_run
-        setattr(BackendInterface, instance.name, instance)
-        # Set methods so they are in __dict__
-        instance._can_run = instance._can_run
-        instance._should_run = instance._should_run
-        if nodes_or_number is not None and _nxver <= (3, 2):
-            instance = nx.utils.decorators.nodes_or_number(nodes_or_number)(instance)
-        return instance
-
-    def _can_run(self, func):
-        """Set the `can_run` attribute to the decorated function."""
-        if not func.__name__.startswith("_"):
-            raise ValueError(
-                "The name of the function used by `_can_run` must begin with '_'; "
-                f"got: {func.__name__!r}"
-            )
-        self.can_run = func
-
-    def _should_run(self, func):
-        """Set the `should_run` attribute to the decorated function."""
-        if not func.__name__.startswith("_"):
-            raise ValueError(
-                "The name of the function used by `_should_run` must begin with '_'; "
-                f"got: {func.__name__!r}"
-            )
-        self.should_run = func
-
-    def __call__(self, /, *args, **kwargs):
-        if not self.fallback:
-            return self.__wrapped__(*args, **kwargs)
-        try:
-            return self.__wrapped__(*args, **kwargs)
-        except NetworkXError:
-            raise
-        except Exception as exc:
-            raise _And_NotImplementedError(exc) from exc
-
-    def __reduce__(self):
-        return _restore_networkx_dispatched, (self.name,)
-
-
-def _default_can_run(*args, **kwargs):
-    return True
-
-
-def _default_should_run(*args, **kwargs):
-    return True
-
-
-def _restore_networkx_dispatched(name):
-    return getattr(BackendInterface, name)
diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py
deleted file mode 100644
index 01c25dd5983..00000000000
--- a/python/nx-cugraph/nx_cugraph/utils/misc.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import itertools
-import operator as op
-import sys
-from random import Random
-from typing import TYPE_CHECKING, SupportsIndex
-
-import cupy as cp
-import numpy as np
-
-if TYPE_CHECKING:
-    import nx_cugraph as nxcg
-
-    from ..typing import Dtype, EdgeKey
-
-try:
-    from itertools import pairwise  # Python >=3.10
-except ImportError:
-
-    def pairwise(it):
-        it = iter(it)
-        for prev in it:
-            for cur in it:
-                yield (prev, cur)
-                prev = cur
-
-
-__all__ = [
-    "index_dtype",
-    "_groupby",
-    "_seed_to_int",
-    "_get_int_dtype",
-    "_get_float_dtype",
-    "_dtype_param",
-    "_cp_iscopied_asarray",
-]
-
-# This may switch to np.uint32 at some point
-index_dtype = np.int32
-
-# To add to `extra_params=` of `networkx_algorithm`
-_dtype_param = {
-    "dtype : dtype or None, optional": (
-        "The data type (np.float32, np.float64, or None) to use for the edge weights "
-        "in the algorithm. If None, then dtype is determined by the edge values."
-    ),
-}
-
-
-def _groupby(
-    groups: cp.ndarray | list[cp.ndarray],
-    values: cp.ndarray | list[cp.ndarray],
-    groups_are_canonical: bool = False,
-) -> dict[int, cp.ndarray]:
-    """Perform a groupby operation given an array of group IDs and array of values.
-
-    Parameters
-    ----------
-    groups : cp.ndarray or list of cp.ndarray
-        Array or list of arrays that holds the group IDs.
-    values : cp.ndarray or list of cp.ndarray
-        Array or list of arrays of values to be grouped according to groups.
-        Must be the same size as groups array.
-    groups_are_canonical : bool, default False
-        Whether the group IDs are consecutive integers beginning with 0.
-
-    Returns
-    -------
-    dict with group IDs as keys and cp.ndarray as values.
-    """
-    if isinstance(groups, list):
-        if groups_are_canonical:
-            raise ValueError(
-                "`groups_are_canonical=True` is not allowed when `groups` is a list."
-            )
-        if len(groups) == 0 or (size := groups[0].size) == 0:
-            return {}
-        sort_indices = cp.lexsort(cp.vstack(groups[::-1]))
-        sorted_groups = cp.vstack([group[sort_indices] for group in groups])
-        prepend = sorted_groups[:, 0].max() + 1
-        changed = cp.abs(cp.diff(sorted_groups, prepend=prepend)).sum(axis=0)
-        changed[0] = 1
-        left_bounds = cp.nonzero(changed)[0]
-    else:
-        if (size := groups.size) == 0:
-            return {}
-        sort_indices = cp.argsort(groups)
-        sorted_groups = groups[sort_indices]
-        prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
-        left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
-    if isinstance(values, list):
-        sorted_values = [vals[sort_indices] for vals in values]
-    else:
-        sorted_values = values[sort_indices]
-    boundaries = pairwise(itertools.chain(left_bounds.tolist(), [size]))
-    if groups_are_canonical:
-        it = enumerate(boundaries)
-    elif isinstance(groups, list):
-        it = zip(map(tuple, sorted_groups.T[left_bounds].tolist()), boundaries)
-    else:
-        it = zip(sorted_groups[left_bounds].tolist(), boundaries)
-    if isinstance(values, list):
-        return {
-            group: [sorted_vals[start:end] for sorted_vals in sorted_values]
-            for group, (start, end) in it
-        }
-    return {group: sorted_values[start:end] for group, (start, end) in it}
-
-
-def _seed_to_int(seed: int | Random | None) -> int:
-    """Handle any valid seed argument and convert it to an int if necessary."""
-    if seed is None:
-        return
-    if isinstance(seed, Random):
-        return seed.randint(0, sys.maxsize)
-    return op.index(seed)  # Ensure seed is integral
-
-
-def _get_int_dtype(
-    val: SupportsIndex, *, signed: bool | None = None, unsigned: bool | None = None
-):
-    """Determine the smallest integer dtype that can store the integer ``val``.
-
-    If signed or unsigned are unspecified, then signed integers are preferred
-    unless the value can be represented by a smaller unsigned integer.
-
-    Raises
-    ------
-    ValueError : If the value cannot be represented with an int dtype.
-    """
-    # This is similar in spirit to `np.min_scalar_type`
-    if signed is not None:
-        if unsigned is not None and (not signed) is (not unsigned):
-            raise TypeError(
-                f"signed (={signed}) and unsigned (={unsigned}) keyword arguments "
-                "are incompatible."
-            )
-        signed = bool(signed)
-        unsigned = not signed
-    elif unsigned is not None:
-        unsigned = bool(unsigned)
-        signed = not unsigned
-
-    val = op.index(val)  # Ensure val is integral
-    if val < 0:
-        if unsigned:
-            raise ValueError(f"Value is incompatible with unsigned int: {val}.")
-        signed = True
-        unsigned = False
-
-    if signed is not False:
-        # Number of bytes (and a power of two)
-        signed_nbytes = (val + (val < 0)).bit_length() // 8 + 1
-        signed_nbytes = next(
-            filter(
-                signed_nbytes.__le__,
-                itertools.accumulate(itertools.repeat(2), op.mul, initial=1),
-            )
-        )
-    if unsigned is not False:
-        # Number of bytes (and a power of two)
-        unsigned_nbytes = (val.bit_length() + 7) // 8
-        unsigned_nbytes = next(
-            filter(
-                unsigned_nbytes.__le__,
-                itertools.accumulate(itertools.repeat(2), op.mul, initial=1),
-            )
-        )
-        if signed is None and unsigned is None:
-            # Prefer signed int if same size
-            signed = signed_nbytes <= unsigned_nbytes
-
-    if signed:
-        dtype_string = f"i{signed_nbytes}"
-    else:
-        dtype_string = f"u{unsigned_nbytes}"
-    try:
-        return np.dtype(dtype_string)
-    except TypeError as exc:
-        raise ValueError("Value is too large to store as integer: {val}") from exc
-
-
-def _get_float_dtype(
-    dtype: Dtype, *, graph: nxcg.CudaGraph | None = None, weight: EdgeKey | None = None
-):
-    """Promote dtype to float32 or float64 as appropriate."""
-    if dtype is None:
-        if graph is None or weight not in graph.edge_values:
-            return np.dtype(np.float32)
-        dtype = graph.edge_values[weight].dtype
-    rv = np.promote_types(dtype, np.float32)
-    if np.float32 != rv != np.float64:
-        raise TypeError(
-            f"Dtype {dtype} cannot be safely promoted to float32 or float64"
-        )
-    return rv
-
-
-def _cp_iscopied_asarray(a, *args, orig_object=None, **kwargs):
-    """Like ``cp.asarray``, but also returns whether the input was copied.
-
-    Use this to avoid unnecessary copies. If given, ``orig_object`` will
-    also be inspected to determine if it was copied.
-
-    >>> is_copied, a = _cp_iscopied_asarray([1, 2, 3])
-    >>> is_copied
-    True
-    >>> a
-    array([1, 2, 3])
-    >>> _cp_iscopied_asarray(a)
-    (False, array([1, 2, 3]))
-    """
-    arr = cp.asarray(a, *args, **kwargs)
-    ptr = arr.__cuda_array_interface__["data"][0]
-    if (
-        hasattr(a, "__cuda_array_interface__")
-        and a.__cuda_array_interface__["data"][0] == ptr
-        and (
-            orig_object is None
-            or hasattr(orig_object, "__cuda_array_interface__")
-            and orig_object.__cuda_array_interface__["data"][0] == ptr
-        )
-        # Should we also check device_id?
-        # and getattr(getattr(a, "data", None), "device_id", None) == arr.data.device_id
-    ):
-        return False, arr
-    return True, arr
-
-
-class _And_NotImplementedError(NotImplementedError):
-    """Additionally make an exception a ``NotImplementedError``.
-
-    For example:
-
-    >>> try:
-    ...     raise _And_NotImplementedError(KeyError("missing"))
-    ... except KeyError:
-    ...     pass
-
-    or
-
-    >>> try:
-    ...     raise _And_NotImplementedError(KeyError("missing"))
-    ... except NotImplementedError:
-    ...     pass
-
-    """
-
-    def __new__(cls, exc):
-        exc_type = type(exc)
-        if issubclass(exc_type, NotImplementedError):
-            new_type = exc_type
-        else:
-            new_type = type(
-                f"{exc_type.__name__}{cls.__name__}",
-                (exc_type, NotImplementedError),
-                {},
-            )
-        instance = NotImplementedError.__new__(new_type)
-        instance.__init__(*exc.args)
-        return instance
diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml
deleted file mode 100644
index d145aa549da..00000000000
--- a/python/nx-cugraph/pyproject.toml
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-[build-system]
-
-requires = [
-    "rapids-build-backend>=0.3.1,<0.4.0.dev0",
-    "setuptools>=61.0.0",
-    "wheel",
-] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-build-backend = "rapids_build_backend.build"
-
-[project]
-name = "nx-cugraph"
-dynamic = ["version"]
-description = "cugraph backend for NetworkX"
-readme = { file = "README.md", content-type = "text/markdown" }
-authors = [
-    { name = "NVIDIA Corporation" },
-]
-license = { text = "Apache 2.0" }
-requires-python = ">=3.10"
-classifiers = [
-    "Development Status :: 4 - Beta",
-    "License :: OSI Approved :: Apache Software License",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3 :: Only",
-    "Intended Audience :: Developers",
-    "Topic :: Software Development :: Libraries :: Python Modules",
-]
-dependencies = [
-    "cupy-cuda11x>=12.0.0",
-    "networkx>=3.0",
-    "numpy>=1.23,<3.0a0",
-    "pylibcugraph==24.12.*,>=0.0.0a0",
-] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-
-[project.optional-dependencies]
-test = [
-    "pandas",
-    "pytest",
-    "pytest-benchmark",
-    "pytest-cov",
-    "pytest-mpl",
-    "pytest-xdist",
-    "scipy",
-] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-
-[project.urls]
-Homepage = "https://github.com/rapidsai/cugraph"
-Documentation = "https://docs.rapids.ai/api/cugraph/stable/"
-
-# "plugin" used in nx version < 3.2
-[project.entry-points."networkx.plugins"]
-cugraph = "nx_cugraph.interface:BackendInterface"
-
-[project.entry-points."networkx.plugin_info"]
-cugraph = "_nx_cugraph:get_info"
-
-# "backend" used in nx version >= 3.2
-[project.entry-points."networkx.backends"]
-cugraph = "nx_cugraph.interface:BackendInterface"
-
-[project.entry-points."networkx.backend_info"]
-cugraph = "_nx_cugraph:get_info"
-
-[tool.setuptools]
-license-files = ["LICENSE"]
-
-[tool.setuptools.dynamic]
-version = {file = "_nx_cugraph/VERSION"}
-
-[tool.setuptools.packages.find]
-include = [
-    "nx_cugraph*",
-    "nx_cugraph.*",
-    "_nx_cugraph*",
-    "_nx_cugraph.*",
-]
-
-[tool.rapids-build-backend]
-build-backend = "setuptools.build_meta"
-commit-files = ["_nx_cugraph/GIT_COMMIT"]
-dependencies-file = "../../dependencies.yaml"
-matrix-entry = "cuda_suffixed=true"
-
-[tool.black]
-line-length = 88
-target-version = ["py310", "py311", "py312"]
-
-[tool.isort]
-sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"]
-profile = "black"
-skip_gitignore = true
-float_to_top = true
-default_section = "THIRDPARTY"
-known_first_party = "nx_cugraph"
-line_length = 88
-extend_skip_glob = [
-    "nx_cugraph/__init__.py",
-    "nx_cugraph/classes/__init__.py",
-]
-
-[tool.pytest.ini_options]
-minversion = "6.0"
-testpaths = "nx_cugraph/tests"
-xfail_strict = true
-markers = [
-    "slow: Skipped unless --runslow passed",
-]
-log_cli_level = "info"
-filterwarnings = [
-    # See: https://docs.python.org/3/library/warnings.html#describing-warning-filters
-    # and: https://docs.pytest.org/en/7.2.x/how-to/capture-warnings.html#controlling-warnings
-    # "error",
-]
-python_files = [
-    "bench_*.py",
-    "test_*.py",
-]
-python_functions = [
-    "bench_*",
-    "test_*",
-]
-addopts = [
-    "--strict-config",  # Force error if config is mispelled
-    "--strict-markers",  # Force error if marker is mispelled (must be defined in config)
-    # "-ra",  # Print summary of all fails/errors
-    "--benchmark-warmup=off",
-    "--benchmark-max-time=0",
-    "--benchmark-min-rounds=1",
-    "--benchmark-columns=min,median,max",
-]
-
-[tool.coverage.run]
-branch = true
-source = ["nx_cugraph"]
-omit = []
-
-[tool.coverage.report]
-ignore_errors = false
-precision = 1
-fail_under = 0
-skip_covered = false  # Nice to see fully covered files when running `run_nx_tests.sh`
-skip_empty = true
-exclude_lines = [
-    "pragma: no cover",
-    "raise AssertionError",
-    "raise NotImplementedError",
-]
-
-[tool.ruff]
-# https://github.com/charliermarsh/ruff/
-line-length = 88
-target-version = "py310"
-[tool.ruff.lint]
-unfixable = [
-    "F841",  # unused-variable (Note: can leave useless expression)
-    "B905",  # zip-without-explicit-strict (Note: prefer `zip(x, y, strict=True)`)
-]
-select = [
-    "ALL",
-]
-external = [
-    # noqa codes that ruff doesn't know about: https://github.com/charliermarsh/ruff#external
-]
-ignore = [
-    # Would be nice to fix these
-    "B905",  # `zip()` without an explicit `strict=` parameter (Note: possible since py39 was dropped; we should do this!)
-    "D100",  # Missing docstring in public module
-    "D101",  # Missing docstring in public class
-    "D102",  # Missing docstring in public method
-    "D103",  # Missing docstring in public function
-    "D104",  # Missing docstring in public package
-    "D105",  # Missing docstring in magic method
-
-    # Maybe consider
-    # "SIM300",  # Yoda conditions are discouraged, use ... instead (Note: we're not this picky)
-    # "SIM401",  # Use dict.get ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer)
-    # "TRY004",  # Prefer `TypeError` exception for invalid type (Note: good advice, but not worth the nuisance)
-    "B020",  # Found for loop that reassigns the iterable it is iterating with each iterable value (too strict)
-    "B904",  # Bare `raise` inside exception clause (like TRY200; sometimes okay)
-    "S310",  # Audit URL open for permitted schemes (Note: we don't download URLs in normal usage)
-
-    # Intentionally ignored
-    "A003",  # Class attribute ... is shadowing a python builtin
-    "ANN101",  # Missing type annotation for `self` in method
-    "ARG004",  # Unused static method argument: `...`
-    "COM812",  # Trailing comma missing
-    "D203",  # 1 blank line required before class docstring (Note: conflicts with D211, which is preferred)
-    "D400",  # First line should end with a period (Note: prefer D415, which also allows "?" and "!")
-    "F403",  # `from .classes import *` used; unable to detect undefined names (Note: used to match networkx)
-    "N801",  # Class name ... should use CapWords convention (Note:we have a few exceptions to this)
-    "N802",  # Function name ... should be lowercase
-    "N803",  # Argument name ... should be lowercase (Maybe okay--except in tests)
-    "N806",  # Variable ... in function should be lowercase
-    "N807",  # Function name should not start and end with `__`
-    "N818",  # Exception name ... should be named with an Error suffix (Note: good advice)
-    "PLR0911",  # Too many return statements
-    "PLR0912",  # Too many branches
-    "PLR0913",  # Too many arguments to function call
-    "PLR0915",  # Too many statements
-    "PLR2004",  # Magic number used in comparison, consider replacing magic with a constant variable
-    "PLW2901",  # Outer for loop variable ... overwritten by inner assignment target (Note: good advice, but too strict)
-    "RET502",  # Do not implicitly `return None` in function able to return non-`None` value
-    "RET503",  # Missing explicit `return` at the end of function able to return non-`None` value
-    "RET504",  # Unnecessary variable assignment before `return` statement
-    "RUF018",  # Avoid assignment expressions in `assert` statements
-    "S110",  # `try`-`except`-`pass` detected, consider logging the exception (Note: good advice, but we don't log)
-    "S112",  # `try`-`except`-`continue` detected, consider logging the exception (Note: good advice, but we don't log)
-    "SIM102",  # Use a single `if` statement instead of nested `if` statements (Note: often necessary)
-    "SIM105",  # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster)
-    "SIM108",  # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer)
-    "TRY003",  # Avoid specifying long messages outside the exception class (Note: why?)
-    "UP038",  # Use `X | Y` in `isinstance` call instead of `(X, Y)` (Note: tuple is faster for now)
-
-    # Ignored categories
-    "C90",  # mccabe (Too strict, but maybe we should make things less complex)
-    "I",  # isort (Should we replace `isort` with this?)
-    "ANN",  # flake8-annotations
-    "BLE",  # flake8-blind-except (Maybe consider)
-    "FBT",  # flake8-boolean-trap (Why?)
-    "DJ",  # flake8-django (We don't use django)
-    "EM",  # flake8-errmsg (Perhaps nicer, but too much work)
-    # "ICN",  # flake8-import-conventions (Doesn't allow "_" prefix such as `_np`)
-    "PYI",  # flake8-pyi (We don't have stub files yet)
-    "SLF",  # flake8-self (We can use our own private variables--sheesh!)
-    "TID",  # flake8-tidy-imports (Rely on isort and our own judgement)
-    # "TCH",  # flake8-type-checking
-    "ARG",  # flake8-unused-arguments (Sometimes helpful, but too strict)
-    "TD",  # flake8-todos (Maybe okay to add some of these)
-    "FIX",  # flake8-fixme (like flake8-todos)
-    "ERA",  # eradicate (We like code in comments!)
-    "PD",  # pandas-vet (Intended for scripts that use pandas, not libraries)
-]
-
-[tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["F401"]  # Allow unused imports (w/o defining `__all__`)
-# Allow assert, print, RNG, and no docstring
-"nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"]
-"_nx_cugraph/__init__.py" = ["E501"]
-"nx_cugraph/__init__.py" = ["E402"]  # Allow module level import not at top of file
-"nx_cugraph/algorithms/**/*py" = ["D205", "D401"]  # Allow flexible docstrings for algorithms
-"nx_cugraph/generators/**/*py" = ["D205", "D401"]  # Allow flexible docstrings for generators
-"nx_cugraph/interface.py" = ["D401"]  # Flexible docstrings
-"nx_cugraph/convert.py" = ["E721"]  # Allow `dtype == object`
-"scripts/update_readme.py" = ["INP001"]  # Not part of a package
-
-[tool.ruff.lint.flake8-annotations]
-mypy-init-return = true
-
-[tool.ruff.lint.flake8-builtins]
-builtins-ignorelist = ["copyright"]
-
-[tool.ruff.lint.flake8-pytest-style]
-fixture-parentheses = false
-mark-parentheses = false
-
-[tool.ruff.lint.pydocstyle]
-convention = "numpy"
diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh
deleted file mode 100755
index 5fb173cf939..00000000000
--- a/python/nx-cugraph/run_nx_tests.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# NETWORKX_GRAPH_CONVERT=cugraph
-#   Used by networkx versions 3.0 and 3.1
-#   Must be set to "cugraph" to test the nx-cugraph backend.
-#
-# NETWORKX_TEST_BACKEND=cugraph
-#   Replaces NETWORKX_GRAPH_CONVERT for networkx versions >=3.2
-#   Must be set to "cugraph" to test the nx-cugraph backend.
-#
-# NETWORKX_FALLBACK_TO_NX=True (optional)
-#   Used by networkx versions >=3.2.  With this set, input graphs will not be
-#   converted to nx-cugraph and the networkx algorithm will be called for
-#   algorithms that we don't implement or if we raise NotImplementedError.
-#   This is sometimes helpful to get increased testing and coverage, but
-#   testing takes longer.  Without it, tests will xfail when encountering a
-#   function that we don't implement.
-#
-# NX_CUGRAPH_USE_COMPAT_GRAPHS, {"True", "False"}, default is "True"
-#   Whether to use `nxcg.Graph` as the nx_cugraph backend graph.
-#   A Graph should be a compatible NetworkX graph, so fewer tests should fail.
-#
-# Coverage of `nx_cugraph.algorithms` is reported and is a good sanity check
-# that algorithms run.
-
-# Warning: cugraph has a .coveragerc file in the <repo root>/python directory,
-# so be mindful of its contents and the CWD when running.
-# FIXME: should something be added to detect/prevent the above?
-set -e
-NETWORKX_GRAPH_CONVERT=cugraph \
-NETWORKX_TEST_BACKEND=cugraph \
-NETWORKX_FALLBACK_TO_NX=True \
-    pytest \
-    --pyargs networkx \
-    --config-file=$(dirname $0)/pyproject.toml \
-    --cov-config=$(dirname $0)/pyproject.toml \
-    --cov=nx_cugraph \
-    --cov-report= \
-    "$@"
-coverage report \
-    --include="*/nx_cugraph/algorithms/*" \
-    --omit=__init__.py \
-    --show-missing \
-    --rcfile=$(dirname $0)/pyproject.toml
diff --git a/python/nx-cugraph/scripts/update_readme.py b/python/nx-cugraph/scripts/update_readme.py
deleted file mode 100755
index 0dad5d67583..00000000000
--- a/python/nx-cugraph/scripts/update_readme.py
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2024, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import argparse
-import re
-import urllib.request
-import zlib
-from collections import namedtuple
-from pathlib import Path
-from warnings import warn
-
-_objs_file_url = "https://networkx.org/documentation/stable/objects.inv"
-
-# See: https://sphobjinv.readthedocs.io/en/stable/syntax.html
-DocObject = namedtuple(
-    "DocObject",
-    "name, domain, role, priority, uri, displayname",
-)
-
-
-def parse_docobject(line):
-    left, right = line.split(":")
-    name, domain = left.rsplit(" ", 1)
-    role, priority, uri, displayname = right.split(" ", 3)
-    if displayname == "-":
-        displayname = name
-    if uri.endswith("$"):
-        uri = uri[:-1] + name
-    return DocObject(name, domain, role, priority, uri, displayname)
-
-
-def replace_body(text, match, new_body):
-    start, stop = match.span("body")
-    return text[:start] + new_body + text[stop:]
-
-
-# NetworkX isn't perfectly intersphinx-compatible, so manually specify some urls.
-# See: https://github.com/networkx/networkx/issues/7278
-MANUAL_OBJECT_URLS = {
-    "networkx.algorithms.centrality.betweenness": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/centrality.html#shortest-path-betweenness"
-    ),
-    "networkx.algorithms.centrality.degree_alg": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/centrality.html#degree"
-    ),
-    "networkx.algorithms.centrality.eigenvector": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/centrality.html#eigenvector"
-    ),
-    "networkx.algorithms.centrality.katz": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/centrality.html#eigenvector"
-    ),
-    "networkx.algorithms.components.connected": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/component.html#connectivity"
-    ),
-    "networkx.algorithms.components.weakly_connected": (
-        "https://networkx.org/documentation/stable/reference/"
-        "algorithms/component.html#weak-connectivity"
-    ),
-    "networkx.classes": (
-        "https://networkx.org/documentation/stable/reference/classes/index.html"
-    ),
-}
-
-
-def main(readme_file, objects_filename):
-    """``readme_file`` must be readable and writable, so use mode ``"a+"``"""
-    from nx_cugraph.scripts.print_tree import create_tree, tree_lines
-
-    # Use the `objects.inv` file to determine URLs. For details about this file, see:
-    # https://sphobjinv.readthedocs.io/en/stable/syntax.html
-    # We might be better off using a library like that, but roll our own for now.
-    with Path(objects_filename).open("rb") as objects_file:
-        line = objects_file.readline()
-        if line != b"# Sphinx inventory version 2\n":
-            raise RuntimeError(f"Bad line in objects.inv:\n\n{line}")
-        line = objects_file.readline()
-        if line != b"# Project: NetworkX\n":
-            raise RuntimeError(f"Bad line in objects.inv:\n\n{line}")
-        line = objects_file.readline()
-        if not line.startswith(b"# Version: "):
-            raise RuntimeError(f"Bad line in objects.inv:\n\n{line}")
-        line = objects_file.readline()
-        if line != b"# The remainder of this file is compressed using zlib.\n":
-            raise RuntimeError(f"Bad line in objects.inv:\n\n{line}")
-        zlib_data = objects_file.read()
-    objects_text = zlib.decompress(zlib_data).decode().strip()
-    objects_list = [parse_docobject(line) for line in objects_text.split("\n")]
-    doc_urls = {
-        obj.name: "https://networkx.org/documentation/stable/" + obj.uri
-        for obj in objects_list
-    }
-    if len(objects_list) != len(doc_urls):
-        raise RuntimeError("Oops; duplicate names found in objects.inv")
-
-    def get_payload(info, **kwargs):
-        path = "networkx." + info.networkx_path
-        subpath, name = path.rsplit(".", 1)
-        # Many objects are referred to in modules above where they are defined.
-        while subpath:
-            path = f"{subpath}.{name}"
-            if path in doc_urls:
-                return f'<a href="{doc_urls[path]}">{name}</a>'
-            subpath = subpath.rsplit(".", 1)[0]
-        warn(f"Unable to find URL for {name!r}: {path}", stacklevel=0)
-        return name
-
-    def get_payload_internal(keys):
-        path = "networkx." + ".".join(keys)
-        name = keys[-1]
-        if path in doc_urls:
-            return f'<a href="{doc_urls[path]}">{name}</a>'
-        path2 = "reference/" + "/".join(keys)
-        if path2 in doc_urls:
-            return f'<a href="{doc_urls[path2]}">{name}</a>'
-        if path in MANUAL_OBJECT_URLS:
-            return f'<a href="{MANUAL_OBJECT_URLS[path]}">{name}</a>'
-        warn(f"Unable to find URL for {name!r}: {path}", stacklevel=0)
-        return name
-
-    readme_file.seek(0)
-    text = readme_file.read()
-    tree = create_tree(get_payload=get_payload)
-    # Algorithms
-    match = re.search(
-        r"### .Algorithms(?P<preamble>.*?)<pre>\n(?P<body>.*?)\n</pre>",
-        text,
-        re.DOTALL,
-    )
-    if not match:
-        raise RuntimeError("Algorithms section not found!")
-    lines = []
-    for key, val in tree["algorithms"].items():
-        lines.append(get_payload_internal(("algorithms", key)))
-        lines.extend(
-            tree_lines(
-                val,
-                parents=("algorithms", key),
-                get_payload_internal=get_payload_internal,
-            )
-        )
-    text = replace_body(text, match, "\n".join(lines))
-    # Generators
-    match = re.search(
-        r"### .Generators(?P<preamble>.*?)<pre>\n(?P<body>.*?)\n</pre>",
-        text,
-        re.DOTALL,
-    )
-    if not match:
-        raise RuntimeError("Generators section not found!")
-    lines = []
-    for key, val in tree["generators"].items():
-        lines.append(get_payload_internal(("generators", key)))
-        lines.extend(
-            tree_lines(
-                val,
-                parents=("generators", key),
-                get_payload_internal=get_payload_internal,
-            )
-        )
-    text = replace_body(text, match, "\n".join(lines))
-    # Other
-    match = re.search(
-        r"### Other\n(?P<preamble>.*?)<pre>\n(?P<body>.*?)\n</pre>",
-        text,
-        re.DOTALL,
-    )
-    if not match:
-        raise RuntimeError("Other section not found!")
-    lines = []
-    for key, val in tree.items():
-        if key in {"algorithms", "generators"}:
-            continue
-        lines.append(get_payload_internal((key,)))
-        lines.extend(
-            tree_lines(val, parents=(key,), get_payload_internal=get_payload_internal)
-        )
-    text = replace_body(text, match, "\n".join(lines))
-    # Now overwrite README.md
-    readme_file.truncate(0)
-    readme_file.write(text)
-    return text
-
-
-def find_or_download_objs_file(objs_file_dir):
-    """Return the path to <objs_file_dir>/objects.inv and download it if necessary.
-
-    Download objects.inv from _objs_file_url if it does not already exist.
-    """
-    objs_file_path = objs_file_dir / "objects.inv"
-    if not objs_file_path.exists():
-        request = urllib.request.Request(_objs_file_url)
-        with (
-            urllib.request.urlopen(request) as response,
-            Path(objs_file_path).open("wb") as out,
-        ):
-            out.write(response.read())
-    return objs_file_path
-
-
-if __name__ == "__main__":
-    # This script imports a nx_cugraph script module, which imports nx_cugraph
-    # runtime dependencies. The script module does not need the runtime deps,
-    # so stub them out to avoid installing them.
-    class Stub:
-        def __getattr__(self, *args, **kwargs):
-            return Stub()
-
-        def __call__(self, *args, **kwargs):
-            return Stub()
-
-    import sys
-
-    sys.modules["cupy"] = Stub()
-    sys.modules["numpy"] = Stub()
-    sys.modules["pylibcugraph"] = Stub()
-
-    parser = argparse.ArgumentParser(
-        "Update README.md to show NetworkX functions implemented by nx-cugraph"
-    )
-    parser.add_argument("readme_filename", help="Path to the README.md file")
-    parser.add_argument(
-        "networkx_objects",
-        nargs="?",
-        default=None,
-        help="Optional path to the objects.inv file from the NetworkX docs. Default is "
-        "the objects.inv file in the directory containing the specified README.md. If "
-        "an objects.inv file does not exist in that location, one will be downloaded "
-        "and saved to that location.",
-    )
-    args = parser.parse_args()
-
-    readme_filename = args.readme_filename
-    readme_path = Path(readme_filename)
-    objects_filename = args.networkx_objects
-    if objects_filename is None:
-        objects_filename = find_or_download_objs_file(readme_path.parent)
-
-    with readme_path.open("a+") as readme_file:
-        main(readme_file, objects_filename)
diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
index 3a53c7d16c3..fb46030bc56 100644
--- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt
+++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt
@@ -65,6 +65,10 @@ set(cython_sources
     all_pairs_sorensen_coefficients.pyx
     all_pairs_overlap_coefficients.pyx
     all_pairs_cosine_coefficients.pyx
+    heterogeneous_biased_neighbor_sample.pyx
+    heterogeneous_uniform_neighbor_sample.pyx
+    homogeneous_biased_neighbor_sample.pyx
+    homogeneous_uniform_neighbor_sample.pyx
     edge_id_lookup_table.pyx
 )
 set(linked_libraries cugraph::cugraph;cugraph::cugraph_c)
diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py
index 9c04a528fd8..5aa351f9ce1 100644
--- a/python/pylibcugraph/pylibcugraph/__init__.py
+++ b/python/pylibcugraph/pylibcugraph/__init__.py
@@ -43,6 +43,19 @@
 
 from pylibcugraph.biased_neighbor_sample import biased_neighbor_sample
 
+from pylibcugraph.homogeneous_uniform_neighbor_sample import (
+    homogeneous_uniform_neighbor_sample,
+)
+from pylibcugraph.homogeneous_biased_neighbor_sample import (
+    homogeneous_biased_neighbor_sample,
+)
+from pylibcugraph.heterogeneous_uniform_neighbor_sample import (
+    heterogeneous_uniform_neighbor_sample,
+)
+from pylibcugraph.heterogeneous_biased_neighbor_sample import (
+    heterogeneous_biased_neighbor_sample,
+)
+
 from pylibcugraph.negative_sampling import negative_sampling
 
 from pylibcugraph.core_number import core_number
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
index 6d5d5a23cca..21f5190ad5f 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd
@@ -178,6 +178,16 @@ cdef extern from "cugraph_c/algorithms.h":
             const cugraph_sample_result_t* result
         )
 
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_sample_result_get_edge_renumber_map(
+            const cugraph_sample_result_t* result
+        )
+
+    cdef cugraph_type_erased_device_array_view_t* \
+        cugraph_sample_result_get_edge_renumber_map_offsets(
+            const cugraph_sample_result_t* result
+        )
+
     # Deprecated, use cugraph_sample_result_get_majors
     cdef cugraph_type_erased_device_array_view_t* \
         cugraph_sample_result_get_sources(
diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
index 3f7b8b9ae29..762fd37a35d 100644
--- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
+++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd
@@ -67,6 +67,62 @@ cdef extern from "cugraph_c/sampling_algorithms.h":
         cugraph_error_t** error
     )
 
+    cdef cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample(
+        const cugraph_resource_handle_t* handle,
+        cugraph_rng_state_t* rng_state,
+        cugraph_graph_t* graph,
+        const cugraph_type_erased_device_array_view_t* start_vertices,
+        const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+        const cugraph_type_erased_host_array_view_t* fan_out,
+        int num_edge_types,
+        const cugraph_sampling_options_t* options,
+        bool_t do_expensive_check,
+        cugraph_sample_result_t** result,
+        cugraph_error_t** error
+    )
+
+    cdef cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample(
+        const cugraph_resource_handle_t* handle,
+        cugraph_rng_state_t* rng_state,
+        cugraph_graph_t* graph,
+        const cugraph_edge_property_view_t* edge_biases,
+        const cugraph_type_erased_device_array_view_t* start_vertices,
+        const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+        const cugraph_type_erased_host_array_view_t* fan_out,
+        int num_edge_types,
+        const cugraph_sampling_options_t* options,
+        bool_t do_expensive_check,
+        cugraph_sample_result_t** result,
+        cugraph_error_t** error
+    )
+
+    cdef cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample(
+        const cugraph_resource_handle_t* handle,
+        cugraph_rng_state_t* rng_state,
+        cugraph_graph_t* graph,
+        const cugraph_type_erased_device_array_view_t* start_vertices,
+        const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+        const cugraph_type_erased_host_array_view_t* fan_out,
+        const cugraph_sampling_options_t* options,
+        bool_t do_expensive_check,
+        cugraph_sample_result_t** result,
+        cugraph_error_t** error
+    )
+
+    cdef cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample(
+        const cugraph_resource_handle_t* handle,
+        cugraph_rng_state_t* rng_state,
+        cugraph_graph_t* graph,
+        const cugraph_edge_property_view_t* edge_biases,
+        const cugraph_type_erased_device_array_view_t* start_vertices,
+        const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets,
+        const cugraph_type_erased_host_array_view_t* fan_out,
+        const cugraph_sampling_options_t* options,
+        bool_t do_expensive_check,
+        cugraph_sample_result_t** result,
+        cugraph_error_t** error
+    )
+
     cdef cugraph_error_code_t cugraph_biased_neighbor_sample(
         const cugraph_resource_handle_t* handle,
         cugraph_graph_t* graph,
diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx
new file mode 100644
index 00000000000..ecdfba3afc5
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx
@@ -0,0 +1,428 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+from pylibcugraph._cugraph_c.types cimport (
+    bool_t,
+    SIZE_T
+)
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.properties cimport (
+    cugraph_edge_property_view_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+    cugraph_type_erased_host_array_view_t,
+    cugraph_type_erased_host_array_view_create,
+    cugraph_type_erased_host_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_sample_result_t,
+    cugraph_prior_sources_behavior_t,
+    cugraph_compression_type_t,
+    cugraph_sampling_options_t,
+    cugraph_sampling_options_create,
+    cugraph_sampling_options_free,
+    cugraph_sampling_set_with_replacement,
+    cugraph_sampling_set_return_hops,
+    cugraph_sampling_set_prior_sources_behavior,
+    cugraph_sampling_set_dedupe_sources,
+    cugraph_sampling_set_renumber_results,
+    cugraph_sampling_set_compress_per_hop,
+    cugraph_sampling_set_compression_type,
+    cugraph_sampling_set_retain_seeds,
+)
+from pylibcugraph._cugraph_c.sampling_algorithms cimport (
+    cugraph_heterogeneous_biased_neighbor_sample,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    assert_CAI_type,
+    assert_AI_type,
+    get_c_type_from_numpy_type,
+)
+from pylibcugraph.internal_types.sampling_result cimport (
+    SamplingResult,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+import warnings
+
+# TODO accept cupy/numpy random state in addition to raw seed.
+def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle,
+                                         _GPUGraph input_graph,
+                                         start_vertex_list,
+                                         starting_vertex_label_offsets,
+                                         h_fan_out,
+                                         num_edge_types,
+                                         bool_t with_replacement,
+                                         bool_t do_expensive_check,
+                                         prior_sources_behavior=None,
+                                         deduplicate_sources=False,
+                                         return_hops=False,
+                                         renumber=False,
+                                         retain_seeds=False,
+                                         compression='COO',
+                                         compress_per_hop=False,
+                                         random_state=None):
+    """
+    Performs biased neighborhood sampling, which samples nodes from
+    a graph based on the current node's neighbors, with a corresponding fan_out
+    value at each hop. The edges are sampled with biases. Heterogeneous
+    neighborhood sampling translates to more than 1 edge types.
+
+    Parameters
+    ----------
+    resource_handle: ResourceHandle
+        Handle to the underlying device and host resources needed for
+        referencing data and running algorithms.
+
+    input_graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    edge_biases: not supported.
+
+    start_vertex_list: device array type
+        Device array containing the list of starting vertices for sampling.
+
+    starting_vertex_label_offsets: device array type (Optional)
+        Offsets of each label within the start vertex list. Expanding
+        'starting_vertex_label_offsets' must lead to an array of
+        len(start_vertex_list)
+
+    h_fan_out: numpy array type
+        Device array containing the branching out (fan-out) degrees per
+        starting vertex for each hop level. The fanout value at each hop for each
+        edge type is given by the relationship
+        h_fanout[x*num_edge_types + edge_type_id]
+
+        The sampling method can use different fan_out values for each edge type
+        which is not the case for homogeneous neighborhood sampling (both biased
+        and uniform).
+
+    num_edge_types: int
+        Number of edge types where a value of 1 translates to homogeneous neighbor
+        sample whereas a value greater than 1 translates to heterogeneous neighbor
+        sample.
+
+    with_replacement: bool
+        If true, sampling procedure is done with replacement (the same vertex
+        can be selected multiple times in the same step).
+
+    do_expensive_check: bool
+        If True, performs more extensive tests on the inputs to ensure
+        validitity, at the expense of increased run time.
+
+    prior_sources_behavior: str (Optional)
+        Options are "carryover", and "exclude".
+        Default will leave the source list as-is.
+        Carryover will carry over sources from previous hops to the
+        current hop.
+        Exclude will exclude sources from previous hops from reappearing
+        as sources in future hops.
+
+    deduplicate_sources: bool (Optional)
+        If True, will deduplicate the source list before sampling.
+        Defaults to False.
+
+    renumber: bool (Optional)
+        If True, will renumber the sources and destinations on a
+        per-batch basis and return the renumber map and batch offsets
+        in additional to the standard returns.
+
+    retain_seeds: bool (Optional)
+        If True, will retain the original seeds (original source vertices)
+        in the output even if they do not have outgoing neighbors.
+        Defaults to False.
+
+    compression: str (Optional)
+        Options: COO (default), CSR, CSC, DCSR, DCSR
+        Sets the compression format for the returned samples.
+
+    compress_per_hop: bool (Optional)
+        If False (default), will create a compressed edgelist for the
+        entire batch.
+        If True, will create a separate compressed edgelist per hop within
+        a batch.
+
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple of device arrays, where the first and second items in the tuple
+    are device arrays containing the starting and ending vertices of each
+    walk respectively, the third item in the tuple is a device array
+    containing the start labels, and the fourth item in the tuple is a device
+    array containing the indices for reconstructing paths.
+
+    If renumber was set to True, then the fifth item in the tuple is a device
+    array containing the renumber map, and the sixth item in the tuple is a
+    device array containing the renumber map offsets (which delineate where
+    the renumber map for each batch starts).
+
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5],
+    ...                     dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4],
+    ...                     dtype=numpy.int32)
+    >>> weights = cupy.asarray([0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 2.1,
+    ...                         1.1, 5.1, 3.1,  4.1, 7.2, 3.2], dtype=numpy.float32)
+    >>> edge_types = cupy.asarray([0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1],
+    ...                            dtype=numpy.int32)
+    >>> start_vertices = cupy.asarray([2, 5, 1]).astype(numpy.int32)
+    >>> num_edge_types = 2
+    >>> starting_vertex_label_offsets = cupy.asarray([0, 2, 3])
+    >>> h_fan_out = numpy.array([2]).astype(numpy.int32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
+    ...     store_transposed=False, renumber=False, do_expensive_check=False)
+    >>> sampling_results = pylibcugraph.heterogeneous_biased_neighbor_sample(
+    ...         resource_handle, G, start_vertices, starting_vertex_label_offsets,
+    ...         h_fan_out, num_edge_types, False, True)
+    >>> sampling_results
+    {'majors': array([2, 2, 2, 5, 5, 1, 1, 1, 1], dtype=int32),
+     'minors': array([0, 1, 3, 3, 4, 0, 2, 3, 4], dtype=int32),
+     'weight': array([5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 3.1, 2.1, 1.1], dtype=float32)}
+
+    """
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = (
+        resource_handle.c_resource_handle_ptr
+    )
+
+    cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
+    cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = <cugraph_type_erased_host_array_view_t*>NULL
+
+    cdef bool_t c_deduplicate_sources = deduplicate_sources
+    cdef bool_t c_return_hops = return_hops
+    cdef bool_t c_renumber = renumber
+    cdef bool_t c_compress_per_hop = compress_per_hop
+
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+    cdef uintptr_t ai_fan_out_ptr
+
+    # FIXME: refactor the way we are creating pointer. Can use a single helper function to create
+
+    assert_CAI_type(start_vertex_list, "start_vertex_list")
+    assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True)
+
+    assert_AI_type(h_fan_out, "h_fan_out")
+
+    if starting_vertex_label_offsets is not None:
+        if starting_vertex_label_offsets[-1] != len(start_vertex_list):
+            raise ValueError(
+                "'starting_vertex_label_offsets' and 'start_vertex_list' must be proportional")
+
+
+    ai_fan_out_ptr = \
+        h_fan_out.__array_interface__["data"][0]
+
+    fan_out_ptr = \
+        cugraph_type_erased_host_array_view_create(
+            <void*>ai_fan_out_ptr,
+            len(h_fan_out),
+            get_c_type_from_numpy_type(h_fan_out.dtype))
+
+
+
+    cdef cugraph_sample_result_t* result_ptr
+
+    cdef uintptr_t cai_start_ptr = \
+        start_vertex_list.__cuda_array_interface__["data"][0]
+
+    cdef uintptr_t cai_starting_vertex_label_offsets_ptr
+    if starting_vertex_label_offsets is not None:
+        cai_starting_vertex_label_offsets_ptr = \
+            starting_vertex_label_offsets.__cuda_array_interface__['data'][0]
+
+
+    cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_start_ptr,
+            len(start_vertex_list),
+            get_c_type_from_numpy_type(start_vertex_list.dtype))
+
+
+    cdef cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if starting_vertex_label_offsets is not None:
+        starting_vertex_label_offsets_ptr = \
+            cugraph_type_erased_device_array_view_create(
+                <void*>cai_starting_vertex_label_offsets_ptr,
+                len(starting_vertex_label_offsets),
+                SIZE_T
+            )
+
+    cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if retain_seeds:
+        if starting_vertex_label_offsets is None:
+            raise ValueError("Must provide label offsets if retain_seeds is True")
+
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e
+    if prior_sources_behavior is None:
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT
+    elif prior_sources_behavior == 'carryover':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER
+    elif prior_sources_behavior == 'exclude':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE
+    else:
+        raise ValueError(
+            f'Invalid option {prior_sources_behavior}'
+            ' for prior sources behavior'
+        )
+
+    cdef cugraph_compression_type_t compression_behavior_e
+    if compression is None or compression == 'COO':
+        compression_behavior_e = cugraph_compression_type_t.COO
+    elif compression == 'CSR':
+        compression_behavior_e = cugraph_compression_type_t.CSR
+    elif compression == 'CSC':
+        compression_behavior_e = cugraph_compression_type_t.CSC
+    elif compression == 'DCSR':
+        compression_behavior_e = cugraph_compression_type_t.DCSR
+    elif compression == 'DCSC':
+        compression_behavior_e = cugraph_compression_type_t.DCSC
+    else:
+        raise ValueError(
+            f'Invalid option {compression}'
+            ' for compression type'
+        )
+
+    cdef cugraph_sampling_options_t* sampling_options
+    error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_sampling_options_create")
+
+    cugraph_sampling_set_with_replacement(sampling_options, with_replacement)
+    cugraph_sampling_set_return_hops(sampling_options, c_return_hops)
+    cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources)
+    cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e)
+    cugraph_sampling_set_renumber_results(sampling_options, c_renumber)
+    cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e)
+    cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop)
+    cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds)
+
+    error_code = cugraph_heterogeneous_biased_neighbor_sample(
+        c_resource_handle_ptr,
+        rng_state_ptr,
+        c_graph_ptr,
+        <cugraph_edge_property_view_t*>NULL, # FIXME: Add support for biased neighbor sampling
+        start_vertex_list_ptr,
+        starting_vertex_label_offsets_ptr,
+        fan_out_ptr,
+        num_edge_types,
+        sampling_options,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_heterogeneous_biased_neighbor_sample")
+
+    # Free the sampling options
+    cugraph_sampling_options_free(sampling_options)
+
+    # Free the two input arrays that are no longer needed.
+    cugraph_type_erased_device_array_view_free(start_vertex_list_ptr)
+    cugraph_type_erased_host_array_view_free(fan_out_ptr)
+
+    if starting_vertex_label_offsets is not None:
+        cugraph_type_erased_device_array_view_free(starting_vertex_label_offsets_ptr)
+
+    # Have the SamplingResult instance assume ownership of the result data.
+    result = SamplingResult()
+    result.set_ptr(result_ptr)
+
+    # Get cupy "views" of the individual arrays to return. These each increment
+    # the refcount on the SamplingResult instance which will keep the data alive
+    # until all references are removed and the GC runs.
+
+    cupy_majors = result.get_majors()
+    cupy_major_offsets = result.get_major_offsets()
+    cupy_minors = result.get_minors()
+    cupy_edge_weights = result.get_edge_weights()
+    cupy_edge_ids = result.get_edge_ids()
+    cupy_edge_types = result.get_edge_types()
+    cupy_batch_ids = result.get_batch_ids()
+    cupy_label_hop_offsets = result.get_label_hop_offsets()
+
+
+    if renumber:
+        cupy_renumber_map = result.get_renumber_map()
+        cupy_renumber_map_offsets = result.get_renumber_map_offsets()
+        cupy_edge_renumber_map = result.get_edge_renumber_map()
+        cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets()
+
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+            'hop_id': None,
+            'renumber_map': cupy_renumber_map,
+            'renumber_map_offsets': cupy_renumber_map_offsets,
+            'edge_renumber_map' : cupy_edge_renumber_map,
+            'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets
+        }
+
+    else:
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+        }
+
+    # Return everything that isn't null
+    return {k: v for k, v in sampling_results.items() if v is not None}
diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx
new file mode 100644
index 00000000000..3fa3575e27d
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx
@@ -0,0 +1,419 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+from pylibcugraph._cugraph_c.types cimport (
+    bool_t,
+    SIZE_T
+)
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+    cugraph_type_erased_host_array_view_t,
+    cugraph_type_erased_host_array_view_create,
+    cugraph_type_erased_host_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_sample_result_t,
+    cugraph_prior_sources_behavior_t,
+    cugraph_compression_type_t,
+    cugraph_sampling_options_t,
+    cugraph_sampling_options_create,
+    cugraph_sampling_options_free,
+    cugraph_sampling_set_with_replacement,
+    cugraph_sampling_set_return_hops,
+    cugraph_sampling_set_prior_sources_behavior,
+    cugraph_sampling_set_dedupe_sources,
+    cugraph_sampling_set_renumber_results,
+    cugraph_sampling_set_compress_per_hop,
+    cugraph_sampling_set_compression_type,
+    cugraph_sampling_set_retain_seeds,
+)
+from pylibcugraph._cugraph_c.sampling_algorithms cimport (
+    cugraph_heterogeneous_uniform_neighbor_sample,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    assert_CAI_type,
+    assert_AI_type,
+    get_c_type_from_numpy_type,
+)
+from pylibcugraph.internal_types.sampling_result cimport (
+    SamplingResult,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+import warnings
+
+# TODO accept cupy/numpy random state in addition to raw seed.
+def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle,
+                                          _GPUGraph input_graph,
+                                          start_vertex_list,
+                                          starting_vertex_label_offsets,
+                                          h_fan_out,
+                                          num_edge_types,
+                                          bool_t with_replacement,
+                                          bool_t do_expensive_check,
+                                          prior_sources_behavior=None,
+                                          deduplicate_sources=False,
+                                          return_hops=False,
+                                          renumber=False,
+                                          retain_seeds=False,
+                                          compression='COO',
+                                          compress_per_hop=False,
+                                          random_state=None):
+    """
+    Performs uniform neighborhood sampling, which samples nodes from
+    a graph based on the current node's neighbors, with a corresponding fan_out
+    value at each hop. The edges are sampled uniformly. Heterogeneous
+    neighborhood sampling translates to more than 1 edge types.
+
+    Parameters
+    ----------
+    resource_handle: ResourceHandle
+        Handle to the underlying device and host resources needed for
+        referencing data and running algorithms.
+
+    input_graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    start_vertex_list: device array type
+        Device array containing the list of starting vertices for sampling.
+
+    starting_vertex_label_offsets: device array type (Optional)
+        Offsets of each label within the start vertex list. Expanding
+        'starting_vertex_label_offsets' must lead to an array of
+        len(start_vertex_list)
+
+    h_fan_out: numpy array type
+        Device array containing the branching out (fan-out) degrees per
+        starting vertex for each hop level. The fanout value at each hop for each
+        edge type is given by the relationship
+        h_fanout[x*num_edge_types + edge_type_id]
+
+        The sampling method can use different fan_out values for each edge type
+        which is not the case for homogeneous neighborhood sampling (both biased
+        and uniform).
+
+    num_edge_types: int
+        Number of edge types where a value of 1 translates to homogeneous neighbor
+        sample whereas a value greater than 1 translates to heterogeneous neighbor
+        sample.
+
+    with_replacement: bool
+        If true, sampling procedure is done with replacement (the same vertex
+        can be selected multiple times in the same step).
+
+    do_expensive_check: bool
+        If True, performs more extensive tests on the inputs to ensure
+        validitity, at the expense of increased run time.
+
+    prior_sources_behavior: str (Optional)
+        Options are "carryover", and "exclude".
+        Default will leave the source list as-is.
+        Carryover will carry over sources from previous hops to the
+        current hop.
+        Exclude will exclude sources from previous hops from reappearing
+        as sources in future hops.
+
+    deduplicate_sources: bool (Optional)
+        If True, will deduplicate the source list before sampling.
+        Defaults to False.
+
+    renumber: bool (Optional)
+        If True, will renumber the sources and destinations on a
+        per-batch basis and return the renumber map and batch offsets
+        in additional to the standard returns.
+
+    retain_seeds: bool (Optional)
+        If True, will retain the original seeds (original source vertices)
+        in the output even if they do not have outgoing neighbors.
+        Defaults to False.
+
+    compression: str (Optional)
+        Options: COO (default), CSR, CSC, DCSR, DCSR
+        Sets the compression format for the returned samples.
+
+    compress_per_hop: bool (Optional)
+        If False (default), will create a compressed edgelist for the
+        entire batch.
+        If True, will create a separate compressed edgelist per hop within
+        a batch.
+
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple of device arrays, where the first and second items in the tuple
+    are device arrays containing the starting and ending vertices of each
+    walk respectively, the third item in the tuple is a device array
+    containing the start labels, and the fourth item in the tuple is a device
+    array containing the indices for reconstructing paths.
+
+    If renumber was set to True, then the fifth item in the tuple is a device
+    array containing the renumber map, and the sixth item in the tuple is a
+    device array containing the renumber map offsets (which delineate where
+    the renumber map for each batch starts).
+
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5],
+    ...                     dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4],
+    ...                     dtype=numpy.int32)
+    >>> weights = cupy.asarray([0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 2.1,
+    ...                         1.1, 5.1, 3.1,  4.1, 7.2, 3.2], dtype=numpy.float32)
+    >>> edge_types = cupy.asarray([0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1],
+    ...                            dtype=numpy.int32)
+    >>> start_vertices = cupy.asarray([2, 5, 1]).astype(numpy.int32)
+    >>> num_edge_types = 2
+    >>> starting_vertex_label_offsets = cupy.asarray([0, 2, 3])
+    >>> h_fan_out = numpy.array([2]).astype(numpy.int32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
+    ...     store_transposed=False, renumber=False, do_expensive_check=False)
+    >>> sampling_results = pylibcugraph.heterogeneous_uniform_neighbor_sample(
+    ...         resource_handle, G, start_vertices, starting_vertex_label_offsets,
+    ...         h_fan_out, num_edge_types, False, True)
+    >>> sampling_results
+    {'majors': array([2, 2, 2, 5, 5, 1, 1, 1, 1], dtype=int32),
+     'minors': array([0, 1, 3, 3, 4, 0, 2, 3, 4], dtype=int32),
+     'weight': array([5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 3.1, 2.1, 1.1], dtype=float32)}
+
+    """
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = (
+        resource_handle.c_resource_handle_ptr
+    )
+
+    cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
+    cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = <cugraph_type_erased_host_array_view_t*>NULL
+
+    cdef bool_t c_deduplicate_sources = deduplicate_sources
+    cdef bool_t c_return_hops = return_hops
+    cdef bool_t c_renumber = renumber
+    cdef bool_t c_compress_per_hop = compress_per_hop
+
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+    cdef uintptr_t ai_fan_out_ptr
+
+    # FIXME: refactor the way we are creating pointer. Can use a single helper function to create
+
+    assert_CAI_type(start_vertex_list, "start_vertex_list")
+    assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True)
+
+    assert_AI_type(h_fan_out, "h_fan_out")
+
+    if starting_vertex_label_offsets is not None:
+        if starting_vertex_label_offsets[-1] != len(start_vertex_list):
+            raise ValueError(
+                "'starting_vertex_label_offsets' and 'start_vertex_list' must be proportional")
+
+    ai_fan_out_ptr = \
+        h_fan_out.__array_interface__["data"][0]
+
+    fan_out_ptr = \
+        cugraph_type_erased_host_array_view_create(
+            <void*>ai_fan_out_ptr,
+            len(h_fan_out),
+            get_c_type_from_numpy_type(h_fan_out.dtype))
+
+
+
+    cdef cugraph_sample_result_t* result_ptr
+
+    cdef uintptr_t cai_start_ptr = \
+        start_vertex_list.__cuda_array_interface__["data"][0]
+
+    cdef uintptr_t cai_starting_vertex_label_offsets_ptr
+    if starting_vertex_label_offsets is not None:
+        cai_starting_vertex_label_offsets_ptr = \
+            starting_vertex_label_offsets.__cuda_array_interface__['data'][0]
+
+
+    cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_start_ptr,
+            len(start_vertex_list),
+            get_c_type_from_numpy_type(start_vertex_list.dtype))
+
+
+    cdef cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if starting_vertex_label_offsets is not None:
+        starting_vertex_label_offsets_ptr = \
+            cugraph_type_erased_device_array_view_create(
+                <void*>cai_starting_vertex_label_offsets_ptr,
+                len(starting_vertex_label_offsets),
+                SIZE_T
+            )
+
+    cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if retain_seeds:
+        if starting_vertex_label_offsets is None:
+            raise ValueError("Must provide label offsets if retain_seeds is True")
+
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e
+    if prior_sources_behavior is None:
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT
+    elif prior_sources_behavior == 'carryover':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER
+    elif prior_sources_behavior == 'exclude':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE
+    else:
+        raise ValueError(
+            f'Invalid option {prior_sources_behavior}'
+            ' for prior sources behavior'
+        )
+
+    cdef cugraph_compression_type_t compression_behavior_e
+    if compression is None or compression == 'COO':
+        compression_behavior_e = cugraph_compression_type_t.COO
+    elif compression == 'CSR':
+        compression_behavior_e = cugraph_compression_type_t.CSR
+    elif compression == 'CSC':
+        compression_behavior_e = cugraph_compression_type_t.CSC
+    elif compression == 'DCSR':
+        compression_behavior_e = cugraph_compression_type_t.DCSR
+    elif compression == 'DCSC':
+        compression_behavior_e = cugraph_compression_type_t.DCSC
+    else:
+        raise ValueError(
+            f'Invalid option {compression}'
+            ' for compression type'
+        )
+
+    cdef cugraph_sampling_options_t* sampling_options
+    error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_sampling_options_create")
+
+    cugraph_sampling_set_with_replacement(sampling_options, with_replacement)
+    cugraph_sampling_set_return_hops(sampling_options, c_return_hops)
+    cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources)
+    cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e)
+    cugraph_sampling_set_renumber_results(sampling_options, c_renumber)
+    cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e)
+    cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop)
+    cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds)
+
+    error_code = cugraph_heterogeneous_uniform_neighbor_sample(
+        c_resource_handle_ptr,
+        rng_state_ptr,
+        c_graph_ptr,
+        start_vertex_list_ptr,
+        starting_vertex_label_offsets_ptr,
+        fan_out_ptr,
+        num_edge_types,
+        sampling_options,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_heterogeneous_uniform_neighbor_sample")
+
+    # Free the sampling options
+    cugraph_sampling_options_free(sampling_options)
+
+    # Free the two input arrays that are no longer needed.
+    cugraph_type_erased_device_array_view_free(start_vertex_list_ptr)
+    cugraph_type_erased_host_array_view_free(fan_out_ptr)
+
+    if starting_vertex_label_offsets is not None:
+        cugraph_type_erased_device_array_view_free(starting_vertex_label_offsets_ptr)
+
+    # Have the SamplingResult instance assume ownership of the result data.
+    result = SamplingResult()
+    result.set_ptr(result_ptr)
+
+    # Get cupy "views" of the individual arrays to return. These each increment
+    # the refcount on the SamplingResult instance which will keep the data alive
+    # until all references are removed and the GC runs.
+    cupy_majors = result.get_majors()
+    cupy_major_offsets = result.get_major_offsets()
+    cupy_minors = result.get_minors()
+    cupy_edge_weights = result.get_edge_weights()
+    cupy_edge_ids = result.get_edge_ids()
+    cupy_edge_types = result.get_edge_types()
+    cupy_batch_ids = result.get_batch_ids()
+    cupy_label_hop_offsets = result.get_label_hop_offsets()
+
+    if renumber:
+        cupy_renumber_map = result.get_renumber_map()
+        cupy_renumber_map_offsets = result.get_renumber_map_offsets()
+        cupy_edge_renumber_map = result.get_edge_renumber_map()
+        cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets()
+
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+            'hop_id': None,
+            'renumber_map': cupy_renumber_map,
+            'renumber_map_offsets': cupy_renumber_map_offsets,
+            'edge_renumber_map' : cupy_edge_renumber_map,
+            'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets
+        }
+
+    else:
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+        }
+
+    # Return everything that isn't null
+    return {k: v for k, v in sampling_results.items() if v is not None}
diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx
new file mode 100644
index 00000000000..e2476de1607
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx
@@ -0,0 +1,418 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+from pylibcugraph._cugraph_c.types cimport (
+    bool_t,
+    SIZE_T
+)
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.properties cimport (
+    cugraph_edge_property_view_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+    cugraph_type_erased_host_array_view_t,
+    cugraph_type_erased_host_array_view_create,
+    cugraph_type_erased_host_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_sample_result_t,
+    cugraph_prior_sources_behavior_t,
+    cugraph_compression_type_t,
+    cugraph_sampling_options_t,
+    cugraph_sampling_options_create,
+    cugraph_sampling_options_free,
+    cugraph_sampling_set_with_replacement,
+    cugraph_sampling_set_return_hops,
+    cugraph_sampling_set_prior_sources_behavior,
+    cugraph_sampling_set_dedupe_sources,
+    cugraph_sampling_set_renumber_results,
+    cugraph_sampling_set_compress_per_hop,
+    cugraph_sampling_set_compression_type,
+    cugraph_sampling_set_retain_seeds,
+)
+from pylibcugraph._cugraph_c.sampling_algorithms cimport (
+    cugraph_homogeneous_biased_neighbor_sample,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    assert_CAI_type,
+    assert_AI_type,
+    get_c_type_from_numpy_type,
+)
+from pylibcugraph.internal_types.sampling_result cimport (
+    SamplingResult,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+import warnings
+
+# TODO accept cupy/numpy random state in addition to raw seed.
+def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle,
+                                       _GPUGraph input_graph,
+                                       start_vertex_list,
+                                       starting_vertex_label_offsets,
+                                       h_fan_out,
+                                       bool_t with_replacement,
+                                       bool_t do_expensive_check,
+                                       prior_sources_behavior=None,
+                                       deduplicate_sources=False,
+                                       return_hops=False,
+                                       renumber=False,
+                                       retain_seeds=False,
+                                       compression='COO',
+                                       compress_per_hop=False,
+                                       random_state=None):
+    """
+    Performs biased neighborhood sampling, which samples nodes from
+    a graph based on the current node's neighbors, with a corresponding fan_out
+    value at each hop. The edges are sampled with biases. Homogeneous
+    neighborhood sampling translates to 1 edge type.
+
+    Parameters
+    ----------
+    resource_handle: ResourceHandle
+        Handle to the underlying device and host resources needed for
+        referencing data and running algorithms.
+
+    input_graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    edge_biases: not supported.
+
+    start_vertex_list: device array type
+        Device array containing the list of starting vertices for sampling.
+
+    starting_vertex_label_offsets: device array type (Optional)
+        Offsets of each label within the start vertex list. Expanding
+        'starting_vertex_label_offsets' must lead to an array of
+        len(start_vertex_list)
+
+    h_fan_out: tuple of numpy array type
+        Device array containing the branching out (fan-out) degrees per
+        starting vertex for each hop level
+
+        The sampling method can use different fan_out values for each edge type
+        which is not the case for homogeneous neighborhood sampling (both biased
+        and uniform).
+
+    with_replacement: bool
+        If true, sampling procedure is done with replacement (the same vertex
+        can be selected multiple times in the same step).
+
+    do_expensive_check: bool
+        If True, performs more extensive tests on the inputs to ensure
+        validitity, at the expense of increased run time.
+
+    prior_sources_behavior: str (Optional)
+        Options are "carryover", and "exclude".
+        Default will leave the source list as-is.
+        Carryover will carry over sources from previous hops to the
+        current hop.
+        Exclude will exclude sources from previous hops from reappearing
+        as sources in future hops.
+
+    deduplicate_sources: bool (Optional)
+        If True, will deduplicate the source list before sampling.
+        Defaults to False.
+
+    renumber: bool (Optional)
+        If True, will renumber the sources and destinations on a
+        per-batch basis and return the renumber map and batch offsets
+        in additional to the standard returns.
+
+    retain_seeds: bool (Optional)
+        If True, will retain the original seeds (original source vertices)
+        in the output even if they do not have outgoing neighbors.
+        Defaults to False.
+
+    compression: str (Optional)
+        Options: COO (default), CSR, CSC, DCSR, DCSR
+        Sets the compression format for the returned samples.
+
+    compress_per_hop: bool (Optional)
+        If False (default), will create a compressed edgelist for the
+        entire batch.
+        If True, will create a separate compressed edgelist per hop within
+        a batch.
+
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple of device arrays, where the first and second items in the tuple
+    are device arrays containing the starting and ending vertices of each
+    walk respectively, the third item in the tuple is a device array
+    containing the start labels, and the fourth item in the tuple is a device
+    array containing the indices for reconstructing paths.
+
+    If renumber was set to True, then the fifth item in the tuple is a device
+    array containing the renumber map, and the sixth item in the tuple is a
+    device array containing the renumber map offsets (which delineate where
+    the renumber map for each batch starts).
+
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5], dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4], dtype=numpy.int32)
+    >>> weights = cupy.asarray([0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 2.1, 1.1, 5.1, 3.1,
+    ...                         4.1, 7.2, 3.2], dtype=numpy.float32)
+    >>> start_vertices = cupy.asarray([2, 5]).astype(numpy.int32)
+    >>> h_fan_out = numpy.array([2]).astype(numpy.int32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
+    ...     store_transposed=True, renumber=False, do_expensive_check=False)
+    >>> sampling_results = pylibcugraph.homogeneous_biased_neighbor_sample(
+    ...         resource_handle, G, start_vertices, None, h_fan_out, False, True)
+    >>> sampling_results
+    {'sources': array([2, 2, 5, 5], dtype=int32),
+     'destinations': array([1, 3, 3, 4], dtype=int32),
+     'indices': array([3.1, 4.1, 7.2, 3.2], dtype=float32)}
+
+    >>> start_vertices = cupy.asarray([2, 5, 1]).astype(numpy.int32)
+    >>> starting_vertex_label_offsets = cupy.asarray([0, 2, 3])
+    >>> sampling_results = pylibcugraph.homogeneous_biased_neighbor_sample(
+    ...         resource_handle, G, start_vertices, starting_vertex_label_offsets,
+    ...         h_fan_out, False, True)
+    >>> >>> sampling_results
+    {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32),
+     'minors': array([1, 3, 3, 4, 3, 4], dtype=int32),
+     'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)}
+
+    """
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = (
+        resource_handle.c_resource_handle_ptr
+    )
+
+    cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
+    cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = <cugraph_type_erased_host_array_view_t*>NULL
+
+    cdef bool_t c_deduplicate_sources = deduplicate_sources
+    cdef bool_t c_return_hops = return_hops
+    cdef bool_t c_renumber = renumber
+    cdef bool_t c_compress_per_hop = compress_per_hop
+
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+    cdef uintptr_t ai_fan_out_ptr
+
+    # FIXME: refactor the way we are creating pointer. Can use a single helper function to create
+
+    assert_CAI_type(start_vertex_list, "start_vertex_list")
+    assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True)
+
+    assert_AI_type(h_fan_out, "h_fan_out")
+
+    if starting_vertex_label_offsets is not None:
+        if starting_vertex_label_offsets[-1] != len(start_vertex_list):
+            raise ValueError(
+                "'starting_vertex_label_offsets' and 'start_vertex_list' must be proportional")
+
+    ai_fan_out_ptr = \
+        h_fan_out.__array_interface__["data"][0]
+
+    fan_out_ptr = \
+        cugraph_type_erased_host_array_view_create(
+            <void*>ai_fan_out_ptr,
+            len(h_fan_out),
+            get_c_type_from_numpy_type(h_fan_out.dtype))
+
+
+
+    cdef cugraph_sample_result_t* result_ptr
+
+    cdef uintptr_t cai_start_ptr = \
+        start_vertex_list.__cuda_array_interface__["data"][0]
+
+    cdef uintptr_t cai_starting_vertex_label_offsets_ptr
+    if starting_vertex_label_offsets is not None:
+        cai_starting_vertex_label_offsets_ptr = \
+            starting_vertex_label_offsets.__cuda_array_interface__['data'][0]
+
+
+    cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_start_ptr,
+            len(start_vertex_list),
+            get_c_type_from_numpy_type(start_vertex_list.dtype))
+
+
+    cdef cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if starting_vertex_label_offsets is not None:
+        starting_vertex_label_offsets_ptr = \
+            cugraph_type_erased_device_array_view_create(
+                <void*>cai_starting_vertex_label_offsets_ptr,
+                len(starting_vertex_label_offsets),
+                SIZE_T
+            )
+
+    cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if retain_seeds:
+        if starting_vertex_label_offsets is None:
+            raise ValueError("Must provide label offsets if retain_seeds is True")
+
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e
+    if prior_sources_behavior is None:
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT
+    elif prior_sources_behavior == 'carryover':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER
+    elif prior_sources_behavior == 'exclude':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE
+    else:
+        raise ValueError(
+            f'Invalid option {prior_sources_behavior}'
+            ' for prior sources behavior'
+        )
+
+    cdef cugraph_compression_type_t compression_behavior_e
+    if compression is None or compression == 'COO':
+        compression_behavior_e = cugraph_compression_type_t.COO
+    elif compression == 'CSR':
+        compression_behavior_e = cugraph_compression_type_t.CSR
+    elif compression == 'CSC':
+        compression_behavior_e = cugraph_compression_type_t.CSC
+    elif compression == 'DCSR':
+        compression_behavior_e = cugraph_compression_type_t.DCSR
+    elif compression == 'DCSC':
+        compression_behavior_e = cugraph_compression_type_t.DCSC
+    else:
+        raise ValueError(
+            f'Invalid option {compression}'
+            ' for compression type'
+        )
+
+    cdef cugraph_sampling_options_t* sampling_options
+    error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_sampling_options_create")
+
+    cugraph_sampling_set_with_replacement(sampling_options, with_replacement)
+    cugraph_sampling_set_return_hops(sampling_options, c_return_hops)
+    cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources)
+    cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e)
+    cugraph_sampling_set_renumber_results(sampling_options, c_renumber)
+    cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e)
+    cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop)
+    cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds)
+
+    error_code = cugraph_homogeneous_biased_neighbor_sample(
+        c_resource_handle_ptr,
+        rng_state_ptr,
+        c_graph_ptr,
+        <cugraph_edge_property_view_t*>NULL, # FIXME: Add support for biased neighbor sampling
+        start_vertex_list_ptr,
+        starting_vertex_label_offsets_ptr,
+        fan_out_ptr,
+        sampling_options,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_homogeneous_biased_neighbor_sample")
+
+    # Free the sampling options
+    cugraph_sampling_options_free(sampling_options)
+
+    # Free the two input arrays that are no longer needed.
+    cugraph_type_erased_device_array_view_free(start_vertex_list_ptr)
+    cugraph_type_erased_host_array_view_free(fan_out_ptr)
+
+    if starting_vertex_label_offsets is not None:
+        cugraph_type_erased_device_array_view_free(starting_vertex_label_offsets_ptr)
+
+    # Have the SamplingResult instance assume ownership of the result data.
+    result = SamplingResult()
+    result.set_ptr(result_ptr)
+
+    # Get cupy "views" of the individual arrays to return. These each increment
+    # the refcount on the SamplingResult instance which will keep the data alive
+    # until all references are removed and the GC runs.
+    cupy_majors = result.get_majors()
+    cupy_major_offsets = result.get_major_offsets()
+    cupy_minors = result.get_minors()
+    cupy_edge_weights = result.get_edge_weights()
+    cupy_edge_ids = result.get_edge_ids()
+    cupy_edge_types = result.get_edge_types()
+    cupy_batch_ids = result.get_batch_ids()
+    cupy_label_hop_offsets = result.get_label_hop_offsets()
+    if renumber:
+        cupy_renumber_map = result.get_renumber_map()
+        cupy_renumber_map_offsets = result.get_renumber_map_offsets()
+        cupy_edge_renumber_map = result.get_edge_renumber_map()
+        cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets()
+
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+            'hop_id': None,
+            'renumber_map': cupy_renumber_map,
+            'renumber_map_offsets': cupy_renumber_map_offsets,
+            'edge_renumber_map' : cupy_edge_renumber_map,
+            'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets
+        }
+
+    else:
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+        }
+
+    # Return everything that isn't null
+    return {k: v for k, v in sampling_results.items() if v is not None}
diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx
new file mode 100644
index 00000000000..3c6cdf77420
--- /dev/null
+++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx
@@ -0,0 +1,413 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Have cython use python 3 syntax
+# cython: language_level = 3
+
+from libc.stdint cimport uintptr_t
+from pylibcugraph._cugraph_c.types cimport (
+    bool_t,
+    SIZE_T
+)
+from pylibcugraph._cugraph_c.resource_handle cimport (
+    cugraph_resource_handle_t,
+)
+from pylibcugraph._cugraph_c.error cimport (
+    cugraph_error_code_t,
+    cugraph_error_t,
+)
+from pylibcugraph._cugraph_c.array cimport (
+    cugraph_type_erased_device_array_view_t,
+    cugraph_type_erased_device_array_view_create,
+    cugraph_type_erased_device_array_view_free,
+    cugraph_type_erased_host_array_view_t,
+    cugraph_type_erased_host_array_view_create,
+    cugraph_type_erased_host_array_view_free,
+)
+from pylibcugraph._cugraph_c.graph cimport (
+    cugraph_graph_t,
+)
+from pylibcugraph._cugraph_c.algorithms cimport (
+    cugraph_sample_result_t,
+    cugraph_prior_sources_behavior_t,
+    cugraph_compression_type_t,
+    cugraph_sampling_options_t,
+    cugraph_sampling_options_create,
+    cugraph_sampling_options_free,
+    cugraph_sampling_set_with_replacement,
+    cugraph_sampling_set_return_hops,
+    cugraph_sampling_set_prior_sources_behavior,
+    cugraph_sampling_set_dedupe_sources,
+    cugraph_sampling_set_renumber_results,
+    cugraph_sampling_set_compress_per_hop,
+    cugraph_sampling_set_compression_type,
+    cugraph_sampling_set_retain_seeds,
+)
+from pylibcugraph._cugraph_c.sampling_algorithms cimport (
+    cugraph_homogeneous_uniform_neighbor_sample,
+)
+from pylibcugraph.resource_handle cimport (
+    ResourceHandle,
+)
+from pylibcugraph.graphs cimport (
+    _GPUGraph,
+)
+from pylibcugraph.utils cimport (
+    assert_success,
+    assert_CAI_type,
+    assert_AI_type,
+    get_c_type_from_numpy_type,
+)
+from pylibcugraph.internal_types.sampling_result cimport (
+    SamplingResult,
+)
+from pylibcugraph._cugraph_c.random cimport (
+    cugraph_rng_state_t
+)
+from pylibcugraph.random cimport (
+    CuGraphRandomState
+)
+import warnings
+
+# TODO accept cupy/numpy random state in addition to raw seed.
+def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle,
+                                          _GPUGraph input_graph,
+                                          start_vertex_list,
+                                          starting_vertex_label_offsets,
+                                          h_fan_out,
+                                          bool_t with_replacement,
+                                          bool_t do_expensive_check,
+                                          prior_sources_behavior=None,
+                                          deduplicate_sources=False,
+                                          return_hops=False,
+                                          renumber=False,
+                                          retain_seeds=False,
+                                          compression='COO',
+                                          compress_per_hop=False,
+                                          random_state=None):
+    """
+    Performs biased neighborhood sampling, which samples nodes from
+    a graph based on the current node's neighbors, with a corresponding fan_out
+    value at each hop. The edges are sampled uniformly. Homogeneous
+    neighborhood sampling translates to 1 edge type.
+
+    Parameters
+    ----------
+    resource_handle: ResourceHandle
+        Handle to the underlying device and host resources needed for
+        referencing data and running algorithms.
+
+    input_graph : SGGraph or MGGraph
+        The input graph, for either Single or Multi-GPU operations.
+
+    start_vertex_list: device array type
+        Device array containing the list of starting vertices for sampling.
+
+    starting_vertex_label_offsets: device array type (Optional)
+        Offsets of each label within the start vertex list. Expanding
+        'starting_vertex_label_offsets' must lead to an array of
+        len(start_vertex_list)
+
+    h_fan_out: numpy array type
+        Device array containing the branching out (fan-out) degrees per
+        starting vertex for each hop level
+
+        The sampling method can use different fan_out values for each edge type
+        which is not the case for homogeneous neighborhood sampling (both biased
+        and uniform).
+
+    with_replacement: bool
+        If true, sampling procedure is done with replacement (the same vertex
+        can be selected multiple times in the same step).
+
+    do_expensive_check: bool
+        If True, performs more extensive tests on the inputs to ensure
+        validitity, at the expense of increased run time.
+
+    prior_sources_behavior: str (Optional)
+        Options are "carryover", and "exclude".
+        Default will leave the source list as-is.
+        Carryover will carry over sources from previous hops to the
+        current hop.
+        Exclude will exclude sources from previous hops from reappearing
+        as sources in future hops.
+
+    deduplicate_sources: bool (Optional)
+        If True, will deduplicate the source list before sampling.
+        Defaults to False.
+
+    renumber: bool (Optional)
+        If True, will renumber the sources and destinations on a
+        per-batch basis and return the renumber map and batch offsets
+        in additional to the standard returns.
+
+    retain_seeds: bool (Optional)
+        If True, will retain the original seeds (original source vertices)
+        in the output even if they do not have outgoing neighbors.
+        Defaults to False.
+
+    compression: str (Optional)
+        Options: COO (default), CSR, CSC, DCSR, DCSR
+        Sets the compression format for the returned samples.
+
+    compress_per_hop: bool (Optional)
+        If False (default), will create a compressed edgelist for the
+        entire batch.
+        If True, will create a separate compressed edgelist per hop within
+        a batch.
+
+    random_state: int (Optional)
+        Random state to use when generating samples.  Optional argument,
+        defaults to a hash of process id, time, and hostname.
+        (See pylibcugraph.random.CuGraphRandomState)
+
+    Returns
+    -------
+    A tuple of device arrays, where the first and second items in the tuple
+    are device arrays containing the starting and ending vertices of each
+    walk respectively, the third item in the tuple is a device array
+    containing the start labels, and the fourth item in the tuple is a device
+    array containing the indices for reconstructing paths.
+
+    If renumber was set to True, then the fifth item in the tuple is a device
+    array containing the renumber map, and the sixth item in the tuple is a
+    device array containing the renumber map offsets (which delineate where
+    the renumber map for each batch starts).
+
+    Examples
+    --------
+    >>> import pylibcugraph, cupy, numpy
+    >>> srcs = cupy.asarray([0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5], dtype=numpy.int32)
+    >>> dsts = cupy.asarray([1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4], dtype=numpy.int32)
+    >>> weights = cupy.asarray([0.1, 2.1, 1.1, 5.1, 3.1, 4.1, 7.2, 3.2, 0.1, 2.1, 1.1, 5.1, 3.1,
+    ...                         4.1, 7.2, 3.2], dtype=numpy.float32)
+    >>> start_vertices = cupy.asarray([2, 5]).astype(numpy.int32)
+    >>> h_fan_out = numpy.array([2]).astype(numpy.int32)
+    >>> resource_handle = pylibcugraph.ResourceHandle()
+    >>> graph_props = pylibcugraph.GraphProperties(
+    ...     is_symmetric=False, is_multigraph=False)
+    >>> G = pylibcugraph.SGGraph(
+    ...     resource_handle, graph_props, srcs, dsts, weight_array=weights,
+    ...     store_transposed=True, renumber=False, do_expensive_check=False)
+    >>> sampling_results = pylibcugraph.homogeneous_uniform_neighbor_sample(
+    ...         resource_handle, G, start_vertices, None, h_fan_out, False, True)
+    >>> sampling_results
+    {'sources': array([2, 2, 5, 5], dtype=int32),
+     'destinations': array([1, 3, 3, 4], dtype=int32),
+     'indices': array([3.1, 4.1, 7.2, 3.2], dtype=float32)}
+
+    >>> start_vertices = cupy.asarray([2, 5, 1]).astype(numpy.int32)
+    >>> starting_vertex_label_offsets = cupy.asarray([0, 2, 3])
+    >>> sampling_results = pylibcugraph.homogeneous_uniform_neighbor_sample(
+    ...         resource_handle, G, start_vertices, starting_vertex_label_offsets,
+    ...         h_fan_out, False, True)
+    >>> >>> sampling_results
+    {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32),
+     'minors': array([1, 3, 3, 4, 3, 4], dtype=int32),
+     'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)}
+
+    """
+    cdef cugraph_resource_handle_t* c_resource_handle_ptr = (
+        resource_handle.c_resource_handle_ptr
+    )
+
+    cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr
+    cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = <cugraph_type_erased_host_array_view_t*>NULL
+
+    cdef bool_t c_deduplicate_sources = deduplicate_sources
+    cdef bool_t c_return_hops = return_hops
+    cdef bool_t c_renumber = renumber
+    cdef bool_t c_compress_per_hop = compress_per_hop
+
+    cdef cugraph_error_code_t error_code
+    cdef cugraph_error_t* error_ptr
+    cdef uintptr_t ai_fan_out_ptr
+
+    # FIXME: refactor the way we are creating pointer. Can use a single helper function to create
+
+    assert_CAI_type(start_vertex_list, "start_vertex_list")
+    assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True)
+
+    assert_AI_type(h_fan_out, "h_fan_out")
+
+    if starting_vertex_label_offsets is not None:
+        if starting_vertex_label_offsets[-1] != len(start_vertex_list):
+            raise ValueError(
+                "'starting_vertex_label_offsets' and 'start_vertex_list' must be proportional")
+
+    ai_fan_out_ptr = \
+        h_fan_out.__array_interface__["data"][0]
+
+    fan_out_ptr = \
+        cugraph_type_erased_host_array_view_create(
+            <void*>ai_fan_out_ptr,
+            len(h_fan_out),
+            get_c_type_from_numpy_type(h_fan_out.dtype))
+
+
+
+    cdef cugraph_sample_result_t* result_ptr
+
+    cdef uintptr_t cai_start_ptr = \
+        start_vertex_list.__cuda_array_interface__["data"][0]
+
+    cdef uintptr_t cai_starting_vertex_label_offsets_ptr
+    if starting_vertex_label_offsets is not None:
+        cai_starting_vertex_label_offsets_ptr = \
+            starting_vertex_label_offsets.__cuda_array_interface__['data'][0]
+
+
+    cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \
+        cugraph_type_erased_device_array_view_create(
+            <void*>cai_start_ptr,
+            len(start_vertex_list),
+            get_c_type_from_numpy_type(start_vertex_list.dtype))
+
+
+    cdef cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if starting_vertex_label_offsets is not None:
+        starting_vertex_label_offsets_ptr = \
+            cugraph_type_erased_device_array_view_create(
+                <void*>cai_starting_vertex_label_offsets_ptr,
+                len(starting_vertex_label_offsets),
+                SIZE_T
+            )
+
+    cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = <cugraph_type_erased_device_array_view_t*>NULL
+    if retain_seeds:
+        if starting_vertex_label_offsets is None:
+            raise ValueError("Must provide label offsets if retain_seeds is True")
+
+    cg_rng_state = CuGraphRandomState(resource_handle, random_state)
+
+    cdef cugraph_rng_state_t* rng_state_ptr = \
+        cg_rng_state.rng_state_ptr
+
+    cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e
+    if prior_sources_behavior is None:
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT
+    elif prior_sources_behavior == 'carryover':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER
+    elif prior_sources_behavior == 'exclude':
+        prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE
+    else:
+        raise ValueError(
+            f'Invalid option {prior_sources_behavior}'
+            ' for prior sources behavior'
+        )
+
+    cdef cugraph_compression_type_t compression_behavior_e
+    if compression is None or compression == 'COO':
+        compression_behavior_e = cugraph_compression_type_t.COO
+    elif compression == 'CSR':
+        compression_behavior_e = cugraph_compression_type_t.CSR
+    elif compression == 'CSC':
+        compression_behavior_e = cugraph_compression_type_t.CSC
+    elif compression == 'DCSR':
+        compression_behavior_e = cugraph_compression_type_t.DCSR
+    elif compression == 'DCSC':
+        compression_behavior_e = cugraph_compression_type_t.DCSC
+    else:
+        raise ValueError(
+            f'Invalid option {compression}'
+            ' for compression type'
+        )
+
+    cdef cugraph_sampling_options_t* sampling_options
+    error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_sampling_options_create")
+
+    cugraph_sampling_set_with_replacement(sampling_options, with_replacement)
+    cugraph_sampling_set_return_hops(sampling_options, c_return_hops)
+    cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources)
+    cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e)
+    cugraph_sampling_set_renumber_results(sampling_options, c_renumber)
+    cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e)
+    cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop)
+    cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds)
+
+    error_code = cugraph_homogeneous_uniform_neighbor_sample(
+        c_resource_handle_ptr,
+        rng_state_ptr,
+        c_graph_ptr,
+        start_vertex_list_ptr,
+        starting_vertex_label_offsets_ptr,
+        fan_out_ptr,
+        sampling_options,
+        do_expensive_check,
+        &result_ptr,
+        &error_ptr)
+    assert_success(error_code, error_ptr, "cugraph_homogeneous_uniform_neighbor_sample")
+
+    # Free the sampling options
+    cugraph_sampling_options_free(sampling_options)
+
+    # Free the two input arrays that are no longer needed.
+    cugraph_type_erased_device_array_view_free(start_vertex_list_ptr)
+    cugraph_type_erased_host_array_view_free(fan_out_ptr)
+
+    if starting_vertex_label_offsets is not None:
+        cugraph_type_erased_device_array_view_free(starting_vertex_label_offsets_ptr)
+
+    # Have the SamplingResult instance assume ownership of the result data.
+    result = SamplingResult()
+    result.set_ptr(result_ptr)
+
+    # Get cupy "views" of the individual arrays to return. These each increment
+    # the refcount on the SamplingResult instance which will keep the data alive
+    # until all references are removed and the GC runs.
+    cupy_majors = result.get_majors()
+    cupy_major_offsets = result.get_major_offsets()
+    cupy_minors = result.get_minors()
+    cupy_edge_weights = result.get_edge_weights()
+    cupy_edge_ids = result.get_edge_ids()
+    cupy_edge_types = result.get_edge_types()
+    cupy_batch_ids = result.get_batch_ids()
+    cupy_label_hop_offsets = result.get_label_hop_offsets()
+
+    if renumber:
+        cupy_renumber_map = result.get_renumber_map()
+        cupy_renumber_map_offsets = result.get_renumber_map_offsets()
+        cupy_edge_renumber_map = result.get_edge_renumber_map()
+        cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets()
+
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+            'hop_id': None,
+            'renumber_map': cupy_renumber_map,
+            'renumber_map_offsets': cupy_renumber_map_offsets,
+            'edge_renumber_map' : cupy_edge_renumber_map,
+            'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets
+        }
+
+    else:
+        sampling_results = {
+            'major_offsets': cupy_major_offsets,
+            'majors': cupy_majors,
+            'minors': cupy_minors,
+            'weight': cupy_edge_weights,
+            'edge_id': cupy_edge_ids,
+            'edge_type': cupy_edge_types,
+            'batch_id': cupy_batch_ids,
+            'label_hop_offsets': cupy_label_hop_offsets,
+        }
+
+    # Return everything that isn't null
+    return {k: v for k, v in sampling_results.items() if v is not None}
diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
index f588237942b..b93618d73ce 100644
--- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
+++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx
@@ -34,6 +34,8 @@ from pylibcugraph._cugraph_c.algorithms cimport (
     cugraph_sample_result_get_offsets, # deprecated
     cugraph_sample_result_get_renumber_map,
     cugraph_sample_result_get_renumber_map_offsets,
+    cugraph_sample_result_get_edge_renumber_map,
+    cugraph_sample_result_get_edge_renumber_map_offsets,
     cugraph_sample_result_free,
 )
 from pylibcugraph.utils cimport (
@@ -257,3 +259,30 @@ cdef class SamplingResult:
 
         return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
                                                      self)
+
+
+    def get_edge_renumber_map(self):
+        if self.c_sample_result_ptr is NULL:
+            raise ValueError("pointer not set, must call set_ptr() with a "
+                             "non-NULL value first.")
+        cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+            cugraph_sample_result_get_edge_renumber_map(self.c_sample_result_ptr)
+        )
+        if device_array_view_ptr is NULL:
+            return None
+
+        return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+                                                     self)
+
+    def get_edge_renumber_map_offsets(self):
+        if self.c_sample_result_ptr is NULL:
+            raise ValueError("pointer not set, must call set_ptr() with a "
+                             "non-NULL value first.")
+        cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = (
+            cugraph_sample_result_get_edge_renumber_map_offsets(self.c_sample_result_ptr)
+        )
+        if device_array_view_ptr is NULL:
+            return None
+
+        return create_cupy_array_view_for_device_ptr(device_array_view_ptr,
+                                                     self)