From 4748ca1a940dfa9a59ba821514a1df8b132be3cf Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 16 Jan 2024 09:39:31 -0600 Subject: [PATCH] nx-cugraph: PLC now handles isolated nodes; clean up our workarounds (#4092) Hooray for removing and cleaning code! Tests also added (we already tested isolated nodes for Louvain). nx-cugraph was updated to handle isolated nodes by passing the node set to PLC in #4077 Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4092 --- python/nx-cugraph/_nx_cugraph/__init__.py | 10 +-- python/nx-cugraph/nx_cugraph/_version.py | 6 +- .../algorithms/community/louvain.py | 12 +--- .../algorithms/components/connected.py | 64 +++++++------------ .../nx-cugraph/nx_cugraph/tests/__init__.py | 3 +- .../nx_cugraph/tests/test_connected.py | 30 +++++++++ 6 files changed, 61 insertions(+), 64 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_connected.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index d02c9c3e940..4e869c76b7a 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -107,21 +107,21 @@ }, "extra_docstrings": { # BEGIN: extra_docstrings - "betweenness_centrality": "`weight` parameter is not yet supported.", + "betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "bfs_edges": "`sort_neighbors` parameter is not yet supported.", "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "bfs_tree": "`sort_neighbors` parameter is not yet supported.", - "edge_betweenness_centrality": "`weight` parameter is not yet supported.", + "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", - "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", + "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", "generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.", "k_truss": ( "Currently raises `NotImplementedError` for graphs with more than one connected\n" "component when k >= 3. We expect to fix this soon." ), "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.", - "louvain_communities": "`seed` parameter is currently ignored.", + "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.", "pagerank": "`dangling` parameter is not supported, but it is checked for validity.", # END: extra_docstrings }, diff --git a/python/nx-cugraph/nx_cugraph/_version.py b/python/nx-cugraph/nx_cugraph/_version.py index 868a2e19475..a528a3bfe1b 100644 --- a/python/nx-cugraph/nx_cugraph/_version.py +++ b/python/nx-cugraph/nx_cugraph/_version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# - import importlib.resources # Read VERSION file from the module that is symlinked to VERSION file -# in the root of the repo at build time or copied to the moudle at +# in the root of the repo at build time or copied to the module at # installation. VERSION is a separate file that allows CI build-time scripts # to update version info (including commit hashes) without modifying # source files. diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index d023bab1a47..413ff9ca5e3 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -23,8 +23,6 @@ not_implemented_for, ) -from ..isolate import _isolates - __all__ = ["louvain_communities"] @@ -56,7 +54,6 @@ def louvain_communities( seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) if G.src_indices.size == 0: - # TODO: PLC doesn't handle empty graphs gracefully! return [{key} for key in G._nodeiter_to_iter(range(len(G)))] if max_level is None: max_level = 500 @@ -76,14 +73,7 @@ def louvain_communities( do_expensive_check=False, ) groups = _groupby(clusters, node_ids, groups_are_canonical=True) - rv = [set(G._nodearray_to_list(ids)) for ids in groups.values()] - # TODO: PLC doesn't handle isolated node_ids yet, so this is a temporary fix - isolates = _isolates(G) - if isolates.size > 0: - isolates = isolates[isolates > node_ids.max()] - if isolates.size > 0: - rv.extend({node} for node in G._nodearray_to_list(isolates)) - return rv + return [set(G._nodearray_to_list(ids)) for ids in groups.values()] @louvain_communities._can_run diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py index cb12aed1d39..95cc907a82b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py @@ -10,8 +10,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import itertools - import cupy as cp import networkx as nx import pylibcugraph as plc @@ -19,8 +17,6 @@ from nx_cugraph.convert import _to_undirected_graph from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for -from ..isolate import _isolates - __all__ = [ "number_connected_components", "connected_components", @@ -32,19 +28,17 @@ @not_implemented_for("directed") @networkx_algorithm(plc="weakly_connected_components", version_added="23.12") def number_connected_components(G): - return sum(1 for _ in connected_components(G)) - # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL - # G = _to_undirected_graph(G) - # unused_node_ids, labels = plc.weakly_connected_components( - # resource_handle=plc.ResourceHandle(), - # graph=G._get_plc_graph(), - # offsets=None, - # indices=None, - # weights=None, - # labels=None, - # do_expensive_check=False, - # ) - # return cp.unique(labels).size + G = _to_undirected_graph(G) + unused_node_ids, labels = plc.weakly_connected_components( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + offsets=None, + indices=None, + weights=None, + labels=None, + do_expensive_check=False, + ) + return cp.unique(labels).size @number_connected_components._can_run @@ -61,7 +55,6 @@ def _(G): def connected_components(G): G = _to_undirected_graph(G) if G.src_indices.size == 0: - # TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully! return [{key} for key in G._nodeiter_to_iter(range(len(G)))] node_ids, labels = plc.weakly_connected_components( resource_handle=plc.ResourceHandle(), @@ -73,16 +66,7 @@ def connected_components(G): do_expensive_check=False, ) groups = _groupby(labels, node_ids) - it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values()) - # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix - isolates = _isolates(G) - if isolates.size > 0: - isolates = isolates[isolates > node_ids.max()] - if isolates.size > 0: - it = itertools.chain( - it, ({node} for node in G._nodearray_to_list(isolates)) - ) - return it + return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values()) @not_implemented_for("directed") @@ -93,20 +77,16 @@ def is_connected(G): raise nx.NetworkXPointlessConcept( "Connectivity is undefined for the null graph." ) - for community in connected_components(G): - return len(community) == len(G) - raise RuntimeError # pragma: no cover - # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL - # unused_node_ids, labels = plc.weakly_connected_components( - # resource_handle=plc.ResourceHandle(), - # graph=G._get_plc_graph(), - # offsets=None, - # indices=None, - # weights=None, - # labels=None, - # do_expensive_check=False, - # ) - # return labels.size == len(G) and cp.unique(labels).size == 1 + unused_node_ids, labels = plc.weakly_connected_components( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + offsets=None, + indices=None, + weights=None, + labels=None, + do_expensive_check=False, + ) + return bool((labels == labels[0]).all()) @not_implemented_for("directed") diff --git a/python/nx-cugraph/nx_cugraph/tests/__init__.py b/python/nx-cugraph/nx_cugraph/tests/__init__.py index ce94db52fa2..c2002fd3fb9 100644 --- a/python/nx-cugraph/nx_cugraph/tests/__init__.py +++ b/python/nx-cugraph/nx_cugraph/tests/__init__.py @@ -1,5 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/nx-cugraph/nx_cugraph/tests/test_connected.py b/python/nx-cugraph/nx_cugraph/tests/test_connected.py new file mode 100644 index 00000000000..fa9f283abc0 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_connected.py @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx + +import nx_cugraph as nxcg + + +def test_connected_isolated_nodes(): + G = nx.complete_graph(4) + G.add_node(max(G) + 1) + assert nx.is_connected(G) is False + assert nxcg.is_connected(G) is False + assert nx.number_connected_components(G) == 2 + assert nxcg.number_connected_components(G) == 2 + assert sorted(nx.connected_components(G)) == [{0, 1, 2, 3}, {4}] + assert sorted(nxcg.connected_components(G)) == [{0, 1, 2, 3}, {4}] + assert nx.node_connected_component(G, 0) == {0, 1, 2, 3} + assert nxcg.node_connected_component(G, 0) == {0, 1, 2, 3} + assert nx.node_connected_component(G, 4) == {4} + assert nxcg.node_connected_component(G, 4) == {4}