From 0906c8dd1e387c266b42eb8597b3e8a66fd496a6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 18:26:31 +0000 Subject: [PATCH 1/9] Fix renumber map column name to not conflict with a method --- .../cugraph/sampling/sampling_utilities.py | 4 ++-- .../sampling/test_uniform_neighbor_sample.py | 18 +++++++++++------- .../test_uniform_neighbor_sample_mg.py | 14 ++++++++------ 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/python/cugraph/cugraph/sampling/sampling_utilities.py b/python/cugraph/cugraph/sampling/sampling_utilities.py index 50c315129dc..0a853c0136e 100644 --- a/python/cugraph/cugraph/sampling/sampling_utilities.py +++ b/python/cugraph/cugraph/sampling/sampling_utilities.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -74,7 +74,7 @@ def sampling_results_from_cupy_array_dict( if renumber: renumber_df = cudf.DataFrame( { - "map": cupy_array_dict["renumber_map"], + "renumber_map": cupy_array_dict["renumber_map"], } ) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py index 206898088ab..560b80993d9 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -796,7 +796,9 @@ def test_uniform_neighbor_sample_renumber(hops): expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique() assert sorted(expected_renumber_map.values_host.tolist()) == sorted( - renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist() + renumber_map.renumber_map[ + 0 : len(expected_renumber_map) + ].values_host.tolist() ) assert (renumber_map.batch_id == 0).all() @@ -854,7 +856,9 @@ def test_uniform_neighbor_sample_offset_renumber(hops): expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique() assert sorted(expected_renumber_map.values_host.tolist()) == sorted( - renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist() + renumber_map.renumber_map[ + 0 : len(expected_renumber_map) + ].values_host.tolist() ) renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna() @@ -902,8 +906,8 @@ def test_uniform_neighbor_sample_csr_csc_global(hops, seed): minors = sampling_results["minors"].dropna() assert len(majors) == len(minors) - majors = renumber_map.map.iloc[majors] - minors = renumber_map.map.iloc[minors] + majors = renumber_map.renumber_map.iloc[majors] + minors = renumber_map.renumber_map.iloc[minors] for i in range(len(majors)): assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])]) @@ -952,8 +956,8 @@ def test_uniform_neighbor_sample_csr_csc_local(hops, seed): majors = cudf.Series(cupy.arange(len(major_offsets) - 1)) majors = majors.repeat(cupy.diff(major_offsets)) - majors = renumber_map.map.iloc[majors] - minors = renumber_map.map.iloc[minors] + majors = renumber_map.renumber_map.iloc[majors] + minors = renumber_map.renumber_map.iloc[minors] for i in range(len(majors)): assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])]) diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 371410b8bd5..32413d3c88d 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -1015,7 +1015,7 @@ def test_uniform_neighbor_sample_renumber(dask_client, hops): assert (renumber_map.batch_id == 0).all() assert ( - renumber_map.map.nunique() + renumber_map.renumber_map.nunique() == cudf.concat( [sources_hop_0, sampling_results_renumbered.destinations] ).nunique() @@ -1091,7 +1091,9 @@ def test_uniform_neighbor_sample_offset_renumber(dask_client, hops): expected_renumber_map = cudf.concat([sources_hop_0, destinations_hop]).unique() assert sorted(expected_renumber_map.values_host.tolist()) == sorted( - renumber_map.map[0 : len(expected_renumber_map)].values_host.tolist() + renumber_map.renumber_map[ + 0 : len(expected_renumber_map) + ].values_host.tolist() ) renumber_map_offsets = offsets_renumbered.renumber_map_offsets.dropna() @@ -1153,8 +1155,8 @@ def test_uniform_neighbor_sample_csr_csc_global(dask_client, hops, seed): minors = sampling_results["minors"].dropna() assert len(majors) == len(minors) - majors = renumber_map.map.iloc[majors] - minors = renumber_map.map.iloc[minors] + majors = renumber_map.renumber_map.iloc[majors] + minors = renumber_map.renumber_map.iloc[minors] for i in range(len(majors)): assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])]) @@ -1221,8 +1223,8 @@ def test_uniform_neighbor_sample_csr_csc_local(dask_client, hops, seed): majors = cudf.Series(cupy.arange(len(major_offsets) - 1)) majors = majors.repeat(cupy.diff(major_offsets)) - majors = renumber_map.map.iloc[majors] - minors = renumber_map.map.iloc[minors] + majors = renumber_map.renumber_map.iloc[majors] + minors = renumber_map.renumber_map.iloc[minors] for i in range(len(majors)): assert 1 == len(el[(el.src == majors.iloc[i]) & (el.dst == minors.iloc[i])]) From 4d67ddd685f2ea404d53f36256598609e42db693 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 18:29:58 +0000 Subject: [PATCH 2/9] One more renumber map --- .../cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py index 7e67eab83c9..194df7d2f75 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -181,7 +181,9 @@ def _write_samples_to_parquet_csr( [ cudf.Series(minors_array[results_start:results_end], name="minors"), cudf.Series( - renumber_map.map.values[renumber_map_start:renumber_map_end], + renumber_map.renumber_map.values[ + renumber_map_start:renumber_map_end + ], name="map", ), label_hop_offsets_current_partition, @@ -299,7 +301,7 @@ def _write_samples_to_parquet_coo( else: renumber_map_end_ix = offsets_z.renumber_map_offsets.iloc[0] - renumber_map_p = renumber_map.map.iloc[ + renumber_map_p = renumber_map.renumber_map.iloc[ renumber_map_start_ix:renumber_map_end_ix ] From b04c7cf5b711ed56132dfadaa38c9acc4d1ca7e8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 18:33:23 +0000 Subject: [PATCH 3/9] Fix when reading from file --- python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py | 4 ++-- python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 943681fb6ff..65bcce78771 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -222,7 +222,7 @@ def test_bulk_sampler_partitions(scratch_dir): ] recovered_samples = cudf.read_parquet(os.path.join(samples_path, file)) - recovered_map = recovered_samples.map + recovered_map = recovered_samples["map"] recovered_samples = recovered_samples.drop("map", axis=1).dropna() for current_batch_id in range(start_batch_id, end_batch_id + 1): diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py index 1f7c4277773..3fddb8f405b 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -166,7 +166,7 @@ def test_bulk_sampler_partitions(dask_client, scratch_dir, mg_input): ] recovered_samples = cudf.read_parquet(os.path.join(samples_path, file)) - recovered_map = recovered_samples.map + recovered_map = recovered_samples["map"] recovered_samples = recovered_samples.drop("map", axis=1).dropna() for current_batch_id in range(start_batch_id, end_batch_id + 1): From e4a5aa50fc0f26904680aad649fe96b92771e3b9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 19:03:04 +0000 Subject: [PATCH 4/9] Cast to float to ensure that it works when the frame is empty because the default dtype is object --- .../cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py b/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py index 3a8fa8451d5..50863fd894c 100644 --- a/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py +++ b/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -120,9 +120,9 @@ def sample_neighbors( return self._get_edgeid_type_d(sampled_df) else: return ( - sampled_df[src_n].values, - sampled_df[dst_n].values, - sampled_df["indices"].values, + sampled_df[src_n].astype("float").values, + sampled_df[dst_n].astype("float").values, + sampled_df["indices"].astype("float").values, ) def _get_edgeid_type_d(self, df): From 338a431ec1ed36b07f91789259e85f13231223ce Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 23:21:01 +0000 Subject: [PATCH 5/9] Make dgl and pyg wheel tests download cugraph/pylibcugraph wheels built during this job --- ci/test_wheel_cugraph-dgl.sh | 5 +++++ ci/test_wheel_cugraph-pyg.sh | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 90c86af95fe..9ecaa75a86b 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g') mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# Download wheels built during this job. +RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps +RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps +python -m pip install ./local-deps/*.whl + # use 'ls' to expand wildcard before adding `[extra]` requires for pip RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index acd42224387..6e44e1ad958 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -11,6 +11,11 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g') mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# Download wheels built during this job. +RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps +RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps +python -m pip install ./local-deps/*.whl + # use 'ls' to expand wildcard before adding `[extra]` requires for pip RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names From f77bee583443c498495fa62035afb470958733be Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 23:21:29 +0000 Subject: [PATCH 6/9] Access map by name --- .../cugraph_dgl/dataloading/utils/sampling_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py index f674bece8be..10d851ebade 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/utils/sampling_helpers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -446,7 +446,7 @@ def _process_sampled_df_csc( major_offsets = cast_to_tensor(df.major_offsets.dropna()) label_hop_offsets = cast_to_tensor(df.label_hop_offsets.dropna()) renumber_map_offsets = cast_to_tensor(df.renumber_map_offsets.dropna()) - renumber_map = cast_to_tensor(df.map.dropna()) + renumber_map = cast_to_tensor(df["map"].dropna()) minors = cast_to_tensor(df.minors.dropna()) n_batches = len(renumber_map_offsets) - 1 From 8a0d38640d3332338b1948f05e568510611d4fff Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 23:21:59 +0000 Subject: [PATCH 7/9] Test a fix for the libcugraph_etl build issues from libcudf --- cpp/libcugraph_etl/src/renumbering.cu | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/libcugraph_etl/src/renumbering.cu b/cpp/libcugraph_etl/src/renumbering.cu index b0fdabe996a..08759702ab4 100644 --- a/cpp/libcugraph_etl/src/renumbering.cu +++ b/cpp/libcugraph_etl/src/renumbering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -776,7 +776,7 @@ struct renumber_functor { for (int i = 0; i < src_view.num_columns(); i++) { auto str_col_view = cudf::strings_column_view(src_view.column(i)); src_vertex_chars_ptrs.push_back( - const_cast(str_col_view.chars().data())); + const_cast(str_col_view.parent().data())); src_vertex_offset_ptrs.push_back( const_cast(str_col_view.offsets().data())); } @@ -784,7 +784,7 @@ struct renumber_functor { for (int i = 0; i < dst_view.num_columns(); i++) { auto str_col_view = cudf::strings_column_view(dst_view.column(i)); dst_vertex_chars_ptrs.push_back( - const_cast(str_col_view.chars().data())); + const_cast(str_col_view.parent().data())); dst_vertex_offset_ptrs.push_back( const_cast(str_col_view.offsets().data())); } @@ -970,13 +970,14 @@ struct renumber_functor { std::move(unrenumber_col1_chars), rmm::device_buffer{}, 0); + auto str_col_1_contents = str_col_1->release(); renumber_table_vectors.push_back( cudf::make_strings_column(size_type(key_value_count), std::move(offset_col_1), - std::move(str_col_1), + std::move(*str_col_1_contents.data), 0, - rmm::device_buffer(size_type(0), exec_strm))); + std::move(*str_col_1_contents.null_mask))); auto offset_col_2 = std::make_unique(cudf::data_type(cudf::type_id::INT32), @@ -991,13 +992,14 @@ struct renumber_functor { std::move(unrenumber_col2_chars), rmm::device_buffer{}, 0); + auto str_col_2_contents = str_col_2->release(); renumber_table_vectors.push_back( cudf::make_strings_column(size_type(key_value_count), std::move(offset_col_2), - std::move(str_col_2), + std::move(*str_col_2_contents.data), 0, - rmm::device_buffer(size_type(0), exec_strm))); + std::move(*str_col_2_contents.null_mask))); // make table from string columns - did at the end From ce53b00623de6091ee67d23f67aede8db7102156 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 23:27:39 +0000 Subject: [PATCH 8/9] Fix export name --- cpp/libcugraph_etl/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/libcugraph_etl/CMakeLists.txt b/cpp/libcugraph_etl/CMakeLists.txt index 0d1f5d2c867..c1b526fb121 100644 --- a/cpp/libcugraph_etl/CMakeLists.txt +++ b/cpp/libcugraph_etl/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -190,7 +190,7 @@ rapids_export(INSTALL cugraph_etl ################################################################################ # - build export --------------------------------------------------------------- -rapids_export(BUILD cugraph +rapids_export(BUILD cugraph_etl EXPORT_SET cugraph_etl-exports GLOBAL_TARGETS cugraph cugraph_c cugraph_etl NAMESPACE cugraph:: From 37cd1c3334583d14c63b3419d80ae3854ca1900a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 5 Feb 2024 21:27:23 -0800 Subject: [PATCH 9/9] Fix last failure --- .../cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py b/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py index 50863fd894c..e4f6dd3745a 100644 --- a/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py +++ b/python/cugraph/cugraph/gnn/dgl_extensions/dgl_uniform_sampler.py @@ -134,7 +134,11 @@ def _get_edgeid_type_d(self, df): for etype, etype_id in self.etype_id_dict.items() } return { - etype: (df[src_n].values, df[dst_n].values, df["indices"].values) + etype: ( + df[src_n].astype("float").values, + df[dst_n].astype("float").values, + df["indices"].astype("float").values, + ) for etype, df in result_d.items() }