From ba73b02b0ecd556343e0cde85c4988950ecbe8ed Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:57:24 -0600 Subject: [PATCH 1/5] Adds benchmarks for additional nx-cugraph 24.02 algos (#4112) * Adds 23 benchmarks for BFS, connected components, triangles, and other related algos * Adds a shell script to run the algo benchmarks individually for easier comparison to NX * Fixes a bug in `nx_cugraph.generic_bfs_edges()` uncovered by these benchmarks Authors: - Rick Ratzel (https://github.com/rlratzel) - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4112 --- .../nx-cugraph/pytest-based/bench_algos.py | 366 +++++++++++++++++- .../nx-cugraph/pytest-based/run-2402.sh | 46 +++ .../traversal/breadth_first_search.py | 2 +- .../nx-cugraph/nx_cugraph/classes/digraph.py | 12 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 4 +- 5 files changed, 413 insertions(+), 17 deletions(-) create mode 100755 benchmarks/nx-cugraph/pytest-based/run-2402.sh diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index a8ed18a20fc..97eb32e2aaa 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -38,20 +38,27 @@ warmup_rounds = 1 dataset_param_values = [ + # name: karate, nodes: 34, edges: 156 pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), + # name: netscience, nodes: 1461, edges: 5484 pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), + # name: email-Eu-core, nodes: 1005, edges: 25571 pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), + # name: cit-Patents, nodes: 3774768, edges: 16518948 pytest.param( datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: hollywood, nodes: 1139905, edges: 57515616 pytest.param( datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] ), + # name: soc-LiveJournal1, nodes: 4847571, edges: 68993773 pytest.param( datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: europe_osm, nodes: 50912018, edges: 54054660 pytest.param( datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] ), @@ -226,12 +233,21 @@ def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): """ G = graph_obj if backend_wrapper.backend_name == "cugraph-preconverted": - G = nxcg.from_networkx(G) + G = nxcg.from_networkx(G, preserve_all_attrs=True) return G +def get_highest_degree_node(graph_obj): + degrees = graph_obj.degree() # list of tuples of (node, degree) + return max(degrees, key=lambda t: t[1])[0] + + ################################################################################ # Benchmarks +def bench_from_networkx(benchmark, graph_obj): + benchmark(nxcg.from_networkx, graph_obj) + + # normalized_param_values = [True, False] # k_param_values = [10, 100] normalized_param_values = [True] @@ -284,7 +300,7 @@ def bench_edge_betweenness_centrality( def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - # The cugraph backend for louvain_communities only supports undirected graphs + # DiGraphs are not supported if G.is_directed(): G = G.to_undirected() result = benchmark.pedantic( @@ -416,10 +432,8 @@ def bench_pagerank(benchmark, graph_obj, backend_wrapper): def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper(nx.single_source_shortest_path_length), @@ -435,11 +449,8 @@ def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapp def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper( nx.single_target_shortest_path_length, exhaust_returned_iterator=True @@ -456,3 +467,340 @@ def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapp # needed for this algo in NX 3.3+ which returns a dict instead of an # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list + + +def bench_ancestors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.ancestors), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_average_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.average_clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_generic_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.generic_bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_layers(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_layers, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + sources=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_predecessors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_predecessors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_successors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_successors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_tree(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_tree), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_core_number(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.core_number), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_descendants(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_descendants_at_distance(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants_at_distance), + args=(G,), + kwargs=dict( + source=node, + distance=1, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_is_bipartite(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_bipartite), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_strongly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_strongly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_weakly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_weakly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_number_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_strongly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_number_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_weakly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_overall_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.overall_reciprocity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.reciprocity), + args=(G,), + kwargs=dict( + nodes=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.strongly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_transitivity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.transitivity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_triangles(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.triangles), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.weakly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list diff --git a/benchmarks/nx-cugraph/pytest-based/run-2402.sh b/benchmarks/nx-cugraph/pytest-based/run-2402.sh new file mode 100755 index 00000000000..44ed0bda43a --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/run-2402.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Runs benchmarks for the 24.02 algos. +# Pass either a or b or both. This is useful for separating batches of runs on different GPUs: +# CUDA_VISIBLE_DEVICES=1 run-2402.sh b + +mkdir -p logs + +# benches="$benches ..." pattern is easy to comment out individual runs +benches= + +while [[ $1 != "" ]]; do + if [[ $1 == "a" ]]; then + benches="$benches bench_ancestors" + benches="$benches bench_average_clustering" + benches="$benches bench_generic_bfs_edges" + benches="$benches bench_bfs_edges" + benches="$benches bench_bfs_layers" + benches="$benches bench_bfs_predecessors" + benches="$benches bench_bfs_successors" + benches="$benches bench_bfs_tree" + benches="$benches bench_clustering" + benches="$benches bench_core_number" + benches="$benches bench_descendants" + elif [[ $1 == "b" ]]; then + benches="$benches bench_descendants_at_distance" + benches="$benches bench_is_bipartite" + benches="$benches bench_is_strongly_connected" + benches="$benches bench_is_weakly_connected" + benches="$benches bench_number_strongly_connected_components" + benches="$benches bench_number_weakly_connected_components" + benches="$benches bench_overall_reciprocity" + benches="$benches bench_reciprocity" + benches="$benches bench_strongly_connected_components" + benches="$benches bench_transitivity" + benches="$benches bench_triangles" + benches="$benches bench_weakly_connected_components" + fi + shift +done + +for bench in $benches; do + pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log" +done diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index ef1c011363a..f5d5e2a995d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -68,7 +68,7 @@ def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbor raise NotImplementedError( "sort_neighbors argument in generic_bfs_edges is not currently supported" ) - return bfs_edges(source, depth_limit=depth_limit) + return bfs_edges(G, source, depth_limit=depth_limit) @generic_bfs_edges._can_run diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 169815eb067..e5cfb8f6815 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -86,9 +86,9 @@ def to_undirected(self, reciprocal=False, as_view=False): key: val[indices].copy() for key, val in self.edge_masks.items() } else: - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) else: src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype( np.int64 @@ -116,9 +116,9 @@ def to_undirected(self, reciprocal=False, as_view=False): src_dst_indices_new = cp.union1d( src_dst_indices_old, src_dst_indices_old_T ) - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) if self.edge_values: recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices))) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index f697668750d..0951ee6b135 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -668,7 +668,9 @@ def _get_plc_graph( raise ValueError( f'symmetrize must be "union" or "intersection"; got "{symmetrize}"' ) - src_indices, dst_indices = cp.divmod(src_dst_new, N, dtype=index_dtype) + src_indices, dst_indices = cp.divmod(src_dst_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) return plc.SGGraph( resource_handle=plc.ResourceHandle(), From 20f7dca65e85b71058b0847288a37a0b3c81a913 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 1 Feb 2024 17:59:58 -0600 Subject: [PATCH 2/5] nx-cugraph: use coverage to ensure all algorithms were run (#4108) Heh, there is probably a "better" way to do this, but this way was fast and easy enough to do, and I hope is "good enough". Authors: - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/4108 --- ci/test_python.sh | 21 +++++ python/nx-cugraph/lint.yaml | 2 +- .../nx_cugraph/scripts/print_table.py | 2 +- .../nx_cugraph/tests/ensure_algos_covered.py | 84 +++++++++++++++++++ .../nx-cugraph/nx_cugraph/tests/test_bfs.py | 33 ++++++++ python/nx-cugraph/run_nx_tests.sh | 14 +++- 6 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_bfs.py diff --git a/ci/test_python.sh b/ci/test_python.sh index 2b8a6347066..b070143f076 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -120,12 +120,33 @@ popd rapids-logger "pytest networkx using nx-cugraph backend" pushd python/nx-cugraph +# Use editable install to make coverage work +pip install -e . --no-deps ./run_nx_tests.sh # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% # in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL") echo "nx-cugraph coverage from networkx tests: $_coverage" echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }' +# Ensure all algorithms were called by comparing covered lines to function lines. +# Run our tests again (they're fast enough) to add their coverage, then create coverage.json +pytest \ + --pyargs nx_cugraph \ + --config-file=./pyproject.toml \ + --cov-config=./pyproject.toml \ + --cov=nx_cugraph \ + --cov-append \ + --cov-report= +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=./pyproject.toml +coverage json --rcfile=./pyproject.toml +python -m nx_cugraph.tests.ensure_algos_covered +# Exercise (and show results of) scripts that show implemented networkx algorithms +python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --different +python -m nx_cugraph.scripts.print_table popd rapids-logger "pytest cugraph-service (single GPU)" diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 5a4773168b6..8e87fc23592 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.15 + rev: v0.16 hooks: - id: validate-pyproject name: Validate pyproject.toml diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py index 7e69de63dc1..117a1444f48 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -59,7 +59,7 @@ def main(path_to_info=None, *, file=sys.stdout): if path_to_info is None: path_to_info = get_path_to_info(version_added_sep=".") lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] - lines.extend(",".join(info) for info in path_to_info.values()) + lines.extend(",".join(map(str, info)) for info in path_to_info.values()) text = "\n".join(lines) print(text, file=file) return text diff --git a/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py new file mode 100644 index 00000000000..7047f0eeafd --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ensure that all functions wrapped by @networkx_algorithm were called. + +This file is run by CI and should not normally be run manually. +""" +import inspect +import json +from pathlib import Path + +from nx_cugraph.interface import BackendInterface +from nx_cugraph.utils import networkx_algorithm + +with Path("coverage.json").open() as f: + coverage = json.load(f) + +filenames_to_executed_lines = { + "nx_cugraph/" + + filename.rsplit("nx_cugraph/", 1)[-1]: set(coverage_info["executed_lines"]) + for filename, coverage_info in coverage["files"].items() +} + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def get_func_filename(func): + return "nx_cugraph" + inspect.getfile(unwrap(func)).rsplit("nx_cugraph", 1)[-1] + + +def get_func_linenos(func): + lines, lineno = inspect.getsourcelines(unwrap(func)) + for i, line in enumerate(lines, lineno): + if ":\n" in line: + return set(range(i + 1, lineno + len(lines))) + raise RuntimeError(f"Could not determine line numbers for function {func}") + + +def has_any_coverage(func): + return bool( + filenames_to_executed_lines[get_func_filename(func)] & get_func_linenos(func) + ) + + +def main(): + no_coverage = set() + for attr, func in vars(BackendInterface).items(): + if not isinstance(func, networkx_algorithm): + continue + if not has_any_coverage(func): + no_coverage.add(attr) + if no_coverage: + msg = "The following algorithms have no coverage: " + ", ".join( + sorted(no_coverage) + ) + # Create a border of "!" + msg = ( + "\n\n" + + "!" * (len(msg) + 6) + + "\n!! " + + msg + + " !!\n" + + "!" * (len(msg) + 6) + + "\n" + ) + raise AssertionError(msg) + print("\nSuccess: coverage determined all algorithms were called!\n") + + +if __name__ == "__main__": + main() diff --git a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py new file mode 100644 index 00000000000..c2b22e98949 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +nxver = parse(nx.__version__) + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) + + +def test_generic_bfs_edges(): + # generic_bfs_edges currently isn't exercised by networkx tests + Gnx = nx.karate_club_graph() + Gcg = nx.karate_club_graph(backend="cugraph") + for depth_limit in (0, 1, 2): + for source in Gnx: + # Some ordering is arbitrary, so I think there's a chance + # this test may fail if networkx or nx-cugraph changes. + nx_result = nx.generic_bfs_edges(Gnx, source, depth_limit=depth_limit) + cg_result = nx.generic_bfs_edges(Gcg, source, depth_limit=depth_limit) + assert sorted(nx_result) == sorted(cg_result), (source, depth_limit) diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh index 07c97cdf947..da7a2014cef 100755 --- a/python/nx-cugraph/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # NETWORKX_GRAPH_CONVERT=cugraph # Used by networkx versions 3.0 and 3.1 @@ -30,7 +30,13 @@ NETWORKX_TEST_BACKEND=cugraph \ NETWORKX_FALLBACK_TO_NX=True \ pytest \ --pyargs networkx \ - --cov=nx_cugraph.algorithms \ - --cov-report term-missing \ - --no-cov-on-fail \ + --config-file=$(dirname $0)/pyproject.toml \ + --cov-config=$(dirname $0)/pyproject.toml \ + --cov=nx_cugraph \ + --cov-report= \ "$@" +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=$(dirname $0)/pyproject.toml From 3d52f177b3cb8213283544ed3bfe3397be86d20f Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:00:50 -0600 Subject: [PATCH 3/5] Optimize the drop-duplicate functionality (#4095) Our current python API leverages dask to implement the `drop-duplicate` functionality but it carries a lot of overhead as it draws a significant amount of host memory and results into a crash when processing large graphs (4+ billion edges). This PR 1. Leverages the CAPI to internally drop multi edges when creating the PLC graph. 2. Deprecates the parameter `multi` which, when set to False, triggers the dask based `drop-duplicate` functionality 3. Add flag `do_expensive_check` to check for `NULL` values in the edgelist Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Vibhu Jawa (https://github.com/VibhuJawa) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4095 --- .../simpleDistributedGraph.py | 17 ++++++++- .../graph_implementation/simpleGraph.py | 17 +++++++-- .../cugraph/cugraph/structure/symmetrize.py | 37 +++++++++++++++++-- .../test_uniform_neighbor_sample_mg.py | 11 +++++- python/pylibcugraph/pylibcugraph/graphs.pyx | 2 +- 5 files changed, 73 insertions(+), 11 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 8fed467bf6d..cdf1e937e67 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -39,6 +39,7 @@ ) from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster import cugraph.dask.comms.comms as Comms +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates class simpleDistributedGraphImpl: @@ -95,6 +96,7 @@ def _make_plc_graph( weight_type, edge_id_type, edge_type_id, + drop_multi_edges, ): weights = None edge_ids = None @@ -149,6 +151,7 @@ def _make_plc_graph( num_arrays=num_arrays, store_transposed=store_transposed, do_expensive_check=False, + drop_multi_edges=drop_multi_edges, ) del edata_x gc.collect() @@ -267,7 +270,7 @@ def __from_edgelist( input_ddf, source, destination, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) value_col = None @@ -277,7 +280,7 @@ def __from_edgelist( source, destination, value_col_names, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -364,6 +367,7 @@ def __from_edgelist( self.weight_type, self.edge_id_type, self.edge_type_id_type, + not self.properties.multi_edge, ) for w, edata in persisted_keys_d.items() } @@ -455,6 +459,15 @@ def view_edge_list(self): else: is_multi_column = True + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + _client = default_client() + workers = _client.scheduler_info()["workers"] + edgelist_df = _memory_efficient_drop_duplicates( + edgelist_df, [srcCol, dstCol], len(workers) + ) + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ [srcCol, dstCol] ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 22d82eb1796..121a4c6245a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -298,7 +298,10 @@ def __from_edgelist( self._replicate_edgelist() self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, renumber=renumber + value_col=value_col, + store_transposed=store_transposed, + renumber=renumber, + drop_multi_edges=not self.properties.multi_edge, ) def to_pandas_edgelist( @@ -477,6 +480,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -484,6 +488,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) @@ -1084,6 +1089,7 @@ def _make_plc_graph( value_col: Dict[str, cudf.DataFrame] = None, store_transposed: bool = False, renumber: bool = True, + drop_multi_edges: bool = False, ): """ Parameters @@ -1100,6 +1106,8 @@ def _make_plc_graph( Whether to renumber the vertices of the graph. Required if inputted vertex ids are not of int32 or int64 type. + drop_multi_edges: bool (default=False) + Whether to drop multi edges """ if value_col is None: @@ -1163,6 +1171,7 @@ def _make_plc_graph( renumber=renumber, do_expensive_check=True, input_array_format=input_array_format, + drop_multi_edges=drop_multi_edges, ) def to_directed(self, DiG, store_transposed=False): diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index b324ff65834..30c6394ade9 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import cudf import dask_cudf from dask.distributed import default_client +import warnings def symmetrize_df( @@ -54,6 +55,11 @@ def symmetrize_df( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -84,6 +90,12 @@ def symmetrize_df( if multi: return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = result.groupby(by=[*vertex_col_name], as_index=False).min() return result @@ -128,6 +140,11 @@ def symmetrize_ddf( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -165,8 +182,15 @@ def symmetrize_ddf( else: result = ddf if multi: + result = result.reset_index(drop=True).repartition(npartitions=len(workers) * 2) return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = _memory_efficient_drop_duplicates( result, vertex_col_name, len(workers) @@ -181,6 +205,7 @@ def symmetrize( value_col_name=None, multi=False, symmetrize=True, + do_expensive_check=False, ): """ Take a dataframe of source destination pairs along with associated @@ -208,6 +233,11 @@ def symmetrize( weights column name. multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -234,8 +264,9 @@ def symmetrize( if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") - csg.null_check(input_df[source_col_name]) - csg.null_check(input_df[dest_col_name]) + if do_expensive_check: # FIXME: Optimize this check as it is currently expensive + csg.null_check(input_df[source_col_name]) + csg.null_check(input_df[dest_col_name]) if isinstance(input_df, dask_cudf.DataFrame): output_df = symmetrize_ddf( diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 460a25cbd14..371410b8bd5 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,6 +26,7 @@ from cugraph.testing import UNDIRECTED_DATASETS from cugraph.dask import uniform_neighbor_sample from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates from cugraph.datasets import email_Eu_core, small_tree from pylibcugraph.testing.utils import gen_fixture_params_product @@ -135,6 +136,14 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): dg = input_combo["MGGraph"] input_df = dg.input_df + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = ["src", "dst"] + workers = dask_client.scheduler_info()["workers"] + input_df = _memory_efficient_drop_duplicates( + input_df, vertex_col_name, len(workers) + ) + result_nbr = uniform_neighbor_sample( dg, input_combo["start_list"], diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index 76ad7690840..def47390ce5 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -463,9 +463,9 @@ cdef class MGGraph(_GPUGraph): edge_type_view_ptr_ptr, store_transposed, num_arrays, - do_expensive_check, drop_self_loops, drop_multi_edges, + do_expensive_check, &(self.c_graph_ptr), &error_ptr) From acb3add62e67d8357649352e81e3179416ee81ff Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:46:22 -0600 Subject: [PATCH 4/5] Removes the `networkx_algorithm` decorator to all SCC functions to disable dispatching to them (#4120) The current cugraph `strongly_connected_components` is a legacy implementation with known issues, and in most cases should not be used until the cugraph team can provide an update. This PR removes the `networkx_algorithm` decorator from all SCC functions to disable dispatching. Users can still run the SCC functions here by accessing them directly from `nx_cugraph`: ```python >>> import nx_cugraph as nxcg >>> nxcg.strongly_connected_components(...) ``` Tested by running the `nx_cugraph` tests (`pytest nx_cugraph/tests`) and the NetworkX tests (`run_nx_tests.sh`) _Note: using the "non-breaking" label since this API was only present in nightlies and never released._ Authors: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) Approvers: - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4120 --- python/nx-cugraph/_nx_cugraph/__init__.py | 9 +++---- .../components/strongly_connected.py | 24 ++++++++++++------- python/nx-cugraph/nx_cugraph/interface.py | 12 ++++++---- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 2f283aa153c..8b5c87a63f9 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -12,7 +12,11 @@ # limitations under the License. """Tell NetworkX about the cugraph backend. This file can update itself: -$ make plugin-info # Recommended method for development +$ make plugin-info + +or + +$ make all # Recommended - runs 'plugin-info' followed by 'lint' or @@ -78,7 +82,6 @@ "is_connected", "is_forest", "is_isolate", - "is_strongly_connected", "is_tree", "is_weakly_connected", "isolates", @@ -96,7 +99,6 @@ "number_connected_components", "number_of_isolates", "number_of_selfloops", - "number_strongly_connected_components", "number_weakly_connected_components", "octahedral_graph", "out_degree_centrality", @@ -111,7 +113,6 @@ "single_source_shortest_path_length", "single_target_shortest_path_length", "star_graph", - "strongly_connected_components", "tadpole_graph", "tetrahedral_graph", "transitivity", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py index d1713129703..a63b3237dfc 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py @@ -15,12 +15,7 @@ import pylibcugraph as plc from nx_cugraph.convert import _to_directed_graph -from nx_cugraph.utils import ( - _groupby, - index_dtype, - networkx_algorithm, - not_implemented_for, -) +from nx_cugraph.utils import _groupby, index_dtype, not_implemented_for __all__ = [ "number_strongly_connected_components", @@ -50,8 +45,19 @@ def _strongly_connected_components(G): return labels +# The networkx_algorithm decorator is (temporarily) removed to disable +# dispatching for this function. The current cugraph +# strongly_connected_components is a legacy implementation with known issues, +# and in most cases should not be used until the cugraph team can provide an +# update. +# +# Users can still call this via the nx_cugraph module directly: +# >>> import nx_cugraph as nxcg +# >>> nxcg.strongly_connected_components(...) + + @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def strongly_connected_components(G): G = _to_directed_graph(G) if G.src_indices.size == 0: @@ -62,7 +68,7 @@ def strongly_connected_components(G): @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def number_strongly_connected_components(G): G = _to_directed_graph(G) if G.src_indices.size == 0: @@ -72,7 +78,7 @@ def number_strongly_connected_components(G): @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def is_strongly_connected(G): G = _to_directed_graph(G) if len(G) == 0: diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index a57074aabb0..46ea5831b0b 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -69,10 +69,14 @@ def key(testpath): no_string_dtype = "string edge values not currently supported" xfail = { - key( - "test_strongly_connected.py:" - "TestStronglyConnected.test_condensation_mapping_and_members" - ): "Strongly connected groups in different iteration order", + # This is removed while strongly_connected_components() is not + # dispatchable. See algorithms/components/strongly_connected.py for + # details. + # + # key( + # "test_strongly_connected.py:" + # "TestStronglyConnected.test_condensation_mapping_and_members" + # ): "Strongly connected groups in different iteration order", } from packaging.version import parse From 581d3562496c5c9c0b094f3cfd6f5631154e3739 Mon Sep 17 00:00:00 2001 From: Don Acosta <97529984+acostadon@users.noreply.github.com> Date: Thu, 1 Feb 2024 19:58:16 -0500 Subject: [PATCH 5/5] corrected links in C API and added groups for support functions (#4131) Fixes broken links in C API docs in Traversal, Sampling and Community algorithm sections resolves issue #4116 Authors: - Don Acosta (https://github.com/acostadon) - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/4131 --- cpp/include/cugraph_c/community_algorithms.h | 14 ++++--- cpp/include/cugraph_c/sampling_algorithms.h | 37 ++++++++++++++++--- cpp/include/cugraph_c/traversal_algorithms.h | 13 ++++--- .../source/api_docs/cugraph_c/community.rst | 10 +---- .../source/api_docs/cugraph_c/labeling.rst | 4 +- .../source/api_docs/cugraph_c/sampling.rst | 13 +++---- .../source/api_docs/cugraph_c/similarity.rst | 4 +- .../source/api_docs/cugraph_c/traversal.rst | 4 +- 8 files changed, 59 insertions(+), 40 deletions(-) diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index feab15c7eeb..e8a71a40162 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include /** @defgroup community Community algorithms - * @{ */ #ifdef __cplusplus @@ -60,18 +59,21 @@ cugraph_error_code_t cugraph_triangle_count(const cugraph_resource_handle_t* han cugraph_error_t** error); /** + * @ingroup community * @brief Get triangle counting vertices */ cugraph_type_erased_device_array_view_t* cugraph_triangle_count_result_get_vertices( cugraph_triangle_count_result_t* result); /** + * @ingroup community * @brief Get triangle counting counts */ cugraph_type_erased_device_array_view_t* cugraph_triangle_count_result_get_counts( cugraph_triangle_count_result_t* result); /** + * @ingroup community * @brief Free a triangle count result * * @param [in] result The result from a sampling algorithm @@ -147,24 +149,28 @@ cugraph_error_code_t cugraph_leiden(const cugraph_resource_handle_t* handle, cugraph_error_t** error); /** + * @ingroup community * @brief Get hierarchical clustering vertices */ cugraph_type_erased_device_array_view_t* cugraph_hierarchical_clustering_result_get_vertices( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Get hierarchical clustering clusters */ cugraph_type_erased_device_array_view_t* cugraph_hierarchical_clustering_result_get_clusters( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Get modularity */ double cugraph_hierarchical_clustering_result_get_modularity( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Free a hierarchical clustering result * * @param [in] result The result from a sampling algorithm @@ -423,7 +429,3 @@ void cugraph_clustering_result_free(cugraph_clustering_result_t* result); #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 782bb5a3790..5760d2098aa 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ #include /** @defgroup samplingC Sampling algorithms - * @{ */ #ifdef __cplusplus @@ -134,6 +133,7 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_error_t** error); /** + * @ingroup samplingC * @brief Get the max path length from random walk result * * @param [in] result The result from random walks @@ -145,6 +145,7 @@ size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result // difference at the moment is that RW results contain weights // and extract_paths results don't. But that's probably wrong. /** + * @ingroup samplingC * @brief Get the matrix (row major order) of vertices in the paths * * @param [in] result The result from a random walk algorithm @@ -154,6 +155,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths( cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief Get the matrix (row major order) of edge weights in the paths * * @param [in] result The result from a random walk algorithm @@ -163,6 +165,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief If the random walk result is compressed, get the path sizes * @deprecated This call will no longer be relevant once the new node2vec are called * @@ -173,6 +176,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_path_siz cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief Free random walks result * * @param [in] result The result from random walks @@ -220,6 +224,7 @@ typedef enum cugraph_compression_type_t { } cugraph_compression_type_t; /** + * @ingroup samplingC * @brief Create sampling options object * * All sampling options set to FALSE @@ -232,6 +237,7 @@ cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t* cugraph_error_t** error); /** + * @ingroup samplingC * @brief Set flag to renumber results * * @param options - opaque pointer to the sampling options @@ -240,6 +246,7 @@ cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t* void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set whether to compress per-hop (True) or globally (False) * * @param options - opaque pointer to the sampling options @@ -248,6 +255,7 @@ void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set flag to sample with_replacement * * @param options - opaque pointer to the sampling options @@ -256,6 +264,7 @@ void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options, void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set flag to sample return_hops * * @param options - opaque pointer to the sampling options @@ -264,6 +273,7 @@ void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options, void cugraph_sampling_set_return_hops(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set compression type * * @param options - opaque pointer to the sampling options @@ -273,6 +283,7 @@ void cugraph_sampling_set_compression_type(cugraph_sampling_options_t* options, cugraph_compression_type_t value); /** + * @ingroup samplingC * @brief Set prior sources behavior * * @param options - opaque pointer to the sampling options @@ -282,6 +293,7 @@ void cugraph_sampling_set_prior_sources_behavior(cugraph_sampling_options_t* opt cugraph_prior_sources_behavior_t value); /** + * @ingroup samplingC * @brief Set flag to sample dedupe_sources prior to sampling * * @param options - opaque pointer to the sampling options @@ -290,6 +302,7 @@ void cugraph_sampling_set_prior_sources_behavior(cugraph_sampling_options_t* opt void cugraph_sampling_set_dedupe_sources(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Free sampling options object * * @param [in] options Opaque pointer to sampling object @@ -369,6 +382,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_destinations( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the major vertices from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -378,6 +392,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_majors( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the minor vertices from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -387,6 +402,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_minors( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the major offsets from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -396,6 +412,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_major_offsets const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the start labels from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -405,6 +422,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_start_labels( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_id from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -414,6 +432,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_id( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_type from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -423,6 +442,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_type( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_weight from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -432,6 +452,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_weight( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the hop from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -441,6 +462,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the label-hop offsets from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -450,6 +472,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_off const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the index from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -469,6 +492,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_offsets( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the renumber map * * @param [in] result The result from a sampling algorithm @@ -478,6 +502,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the renumber map offsets * * @param [in] result The result from a sampling algorithm @@ -487,6 +512,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_ const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Free a sampling result * * @param [in] result The result from a sampling algorithm @@ -494,6 +520,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_ void cugraph_sample_result_free(cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Create a sampling result (testing API) * * @param [in] handle Handle for accessing resources @@ -524,6 +551,7 @@ cugraph_error_code_t cugraph_test_sample_result_create( cugraph_error_t** error); /** + * @ingroup samplingC * @brief Create a sampling result (testing API) * * @param [in] handle Handle for accessing resources @@ -554,6 +582,7 @@ cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( cugraph_error_t** error); /** + * @ingroup samplingC * @brief Select random vertices from the graph * * @param [in] handle Handle for accessing resources @@ -576,7 +605,3 @@ cugraph_error_code_t cugraph_select_random_vertices(const cugraph_resource_handl #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/cpp/include/cugraph_c/traversal_algorithms.h b/cpp/include/cugraph_c/traversal_algorithms.h index 8959366ac17..e25fa167e43 100644 --- a/cpp/include/cugraph_c/traversal_algorithms.h +++ b/cpp/include/cugraph_c/traversal_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ /** @defgroup traversal Traversal Algorithms * @ingroup c_api - * @{ */ #ifdef __cplusplus @@ -40,6 +39,7 @@ typedef struct { } cugraph_paths_result_t; /** + * @ingroup traversal * @brief Get the vertex ids from the paths result * * @param [in] result The result from bfs or sssp @@ -49,6 +49,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_vertices( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Get the distances from the paths result * * @param [in] result The result from bfs or sssp @@ -58,6 +59,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_distances( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Get the predecessors from the paths result * * @param [in] result The result from bfs or sssp @@ -69,6 +71,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_predecessors( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Free paths result * * @param [in] result The result from bfs or sssp @@ -188,6 +191,7 @@ cugraph_error_code_t cugraph_extract_paths( size_t cugraph_extract_paths_result_get_max_path_length(cugraph_extract_paths_result_t* result); /** + * @ingroup traversal * @brief Get the matrix (row major order) of paths * * @param [in] result The result from extract_paths @@ -197,6 +201,7 @@ cugraph_type_erased_device_array_view_t* cugraph_extract_paths_result_get_paths( cugraph_extract_paths_result_t* result); /** + * @ingroup traversal * @brief Free extract_paths result * * @param [in] result The result from extract_paths @@ -206,7 +211,3 @@ void cugraph_extract_paths_result_free(cugraph_extract_paths_result_t* result); #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/docs/cugraph/source/api_docs/cugraph_c/community.rst b/docs/cugraph/source/api_docs/cugraph_c/community.rst index 0bbfe365c4d..d55325720c4 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/community.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/community.rst @@ -1,12 +1,6 @@ Community ========= -.. role:: py(code) - :language: c - :class: highlight - -``#include `` - Triangle Counting ----------------- .. doxygenfunction:: cugraph_triangle_count @@ -45,8 +39,8 @@ Spectral Clustering - Modularity Maximization .. doxygenfunction:: cugraph_analyze_clustering_modularity :project: libcugraph -Spectral Clusteriong - Edge Cut -------------------------------- +Spectral Clustering - Edge Cut +------------------------------ .. doxygenfunction:: cugraph_analyze_clustering_edge_cut :project: libcugraph diff --git a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst index af105ee8fc9..4ca598c0a06 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst @@ -12,8 +12,8 @@ Strongly Connected Components .. doxygenfunction:: cugraph_strongly_connected_components :project: libcugraph -Support -------- +Labeling Support Functions +-------------------------- .. doxygengroup:: labeling :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/sampling.rst b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst index 21b837daf93..3d5af713c33 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/sampling.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst @@ -7,7 +7,7 @@ Uniform Random Walks :project: libcugraph Biased Random Walks --------------------- +------------------- .. doxygenfunction:: cugraph_biased_random_walks :project: libcugraph @@ -21,16 +21,13 @@ Node2Vec .. doxygenfunction:: cugraph_node2vec :project: libcugraph -Uniform Neighborhood Sampling ------------------------------ -.. doxygenfunction:: cugraph_uniform_neighbor_sample_with_edge_properties - :project: libcugraph - +Uniform Neighbor Sampling +------------------------- .. doxygenfunction:: cugraph_uniform_neighbor_sample :project: libcugraph -Support -------- +Sampling Support Functions +-------------------------- .. doxygengroup:: samplingC :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst index fba07ad206c..200ba695781 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst @@ -17,8 +17,8 @@ Overlap .. doxygenfunction:: cugraph_overlap_coefficients :project: libcugraph -Support -------- +Similarty Support Functions +--------------------------- .. doxygengroup:: similarity :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst index c90760e9e79..1578951e05f 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst @@ -22,8 +22,8 @@ Extract Max Path Length .. doxygenfunction:: cugraph_extract_paths_result_get_max_path_length :project: libcugraph -Support -------- +Traversal Support Functions +--------------------------- .. doxygengroup:: traversal :project: libcugraph :members: