diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 9c83535b771..f5886540252 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index a559be18077..270bfa239ad 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda11.8-ucx1.15.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.10": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index ca10c04edee..e31428e4b0c 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 6e2bf45700a..835274999ba 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,24 +5,24 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.12-cpp-cuda12.5-ucx1.17.0-openmpi-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.12-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/cuda:24.10": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { "version": "12.5", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/cuda", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bc489ffd3f0..b272fb43e35 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -38,7 +38,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -47,7 +47,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -77,13 +77,13 @@ jobs: date: ${{ inputs.date }} script: ci/build_wheel_pylibcugraph.sh extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.10 + extra-repo-sha: branch-24.12 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY node_type: cpu32 wheel-publish-pylibcugraph: needs: wheel-build-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -93,7 +93,7 @@ jobs: wheel-build-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -101,12 +101,12 @@ jobs: date: ${{ inputs.date }} script: ci/build_wheel_cugraph.sh extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.10 + extra-repo-sha: branch-24.12 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY wheel-publish-cugraph: needs: wheel-build-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -116,7 +116,7 @@ jobs: wheel-build-nx-cugraph: needs: wheel-publish-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -126,7 +126,7 @@ jobs: wheel-publish-nx-cugraph: needs: wheel-build-nx-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -136,7 +136,7 @@ jobs: wheel-build-cugraph-dgl: needs: wheel-publish-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -146,7 +146,7 @@ jobs: wheel-publish-cugraph-dgl: needs: wheel-build-cugraph-dgl secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -156,7 +156,7 @@ jobs: wheel-build-cugraph-pyg: needs: wheel-publish-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -166,7 +166,7 @@ jobs: wheel-publish-cugraph-pyg: needs: wheel-build-cugraph-pyg secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -175,7 +175,7 @@ jobs: package-name: cugraph-pyg wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -185,7 +185,7 @@ jobs: wheel-publish-cugraph-equivariant: needs: wheel-build-cugraph-equivariant secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.12 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index dacd9a93399..b0a1308237e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - changed-files - checks - conda-cpp-build - conda-cpp-tests @@ -34,29 +35,69 @@ jobs: - wheel-tests-cugraph-equivariant - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.12 + if: always() + with: + needs: ${{ toJSON(needs) }} + changed-files: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 + with: + files_yaml: | + test_cpp: + - '**' + - '!.devcontainers/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!mg_utils/**' + - '!notebooks/**' + - '!python/**' + - '!readme_pages/**' + # TODO: Remove this before merging + - '!.github/**' + test_notebooks: + - '**' + - '!.devcontainers/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + # TODO: Remove this before merging + - '!.github/**' + test_python: + - '**' + - '!.devcontainers/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + # TODO: Remove this before merging + - '!.github/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: pull-request node_type: cpu32 conda-cpp-tests: - needs: conda-cpp-build + needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: pull-request enable_check_symbols: true @@ -64,19 +105,21 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request conda-python-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request conda-notebook-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -86,7 +129,7 @@ jobs: docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -96,63 +139,67 @@ jobs: wheel-build-pylibcugraph: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_pylibcugraph.sh extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.10 + extra-repo-sha: branch-24.12 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY node_type: cpu32 wheel-tests-pylibcugraph: - needs: wheel-build-pylibcugraph + needs: [wheel-build-pylibcugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_pylibcugraph.sh wheel-build-cugraph: needs: wheel-tests-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_cugraph.sh extra-repo: rapidsai/cugraph-ops - extra-repo-sha: branch-24.10 + extra-repo-sha: branch-24.12 extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY wheel-tests-cugraph: - needs: wheel-build-cugraph + needs: [wheel-build-cugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cugraph.sh wheel-build-nx-cugraph: needs: wheel-tests-pylibcugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_nx-cugraph.sh wheel-tests-nx-cugraph: - needs: wheel-build-nx-cugraph + needs: [wheel-build-nx-cugraph, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_nx-cugraph.sh wheel-build-cugraph-dgl: needs: wheel-tests-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_cugraph-dgl.sh wheel-tests-cugraph-dgl: - needs: wheel-build-cugraph-dgl + needs: [wheel-build-cugraph-dgl, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cugraph-dgl.sh @@ -160,35 +207,37 @@ jobs: wheel-build-cugraph-pyg: needs: wheel-tests-cugraph secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_cugraph-pyg.sh wheel-tests-cugraph-pyg: - needs: wheel-build-cugraph-pyg + needs: [wheel-build-cugraph-pyg, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cugraph-pyg.sh matrix_filter: map(select(.ARCH == "amd64")) wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_cugraph-equivariant.sh wheel-tests-cugraph-equivariant: - needs: wheel-build-cugraph-equivariant + needs: [wheel-build-cugraph-equivariant, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel_cugraph-equivariant.sh matrix_filter: map(select(.ARCH == "amd64")) devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 with: arch: '["amd64"]' cuda: '["12.5"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 957d29ce72b..5fbdd276bd6 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel) conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibcugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibcugraph.sh wheel-tests-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -60,7 +60,7 @@ jobs: script: ci/test_wheel_cugraph.sh wheel-tests-nx-cugraph: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: script: ci/test_wheel_nx-cugraph.sh wheel-tests-cugraph-dgl: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) wheel-tests-cugraph-pyg: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: matrix_filter: map(select(.ARCH == "amd64")) wheel-tests-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 36c5fa84166..8ff284210b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,11 +17,11 @@ repos: hooks: - id: black language_version: python3 - args: [--target-version=py39] + args: [--target-version=py310] files: ^(python/.*|benchmarks/.*)$ exclude: ^python/nx-cugraph/ - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.1.1 hooks: - id: flake8 args: ["--config=.flake8"] @@ -34,7 +34,7 @@ repos: hooks: - id: yesqa additional_dependencies: - - flake8==6.0.0 + - flake8==7.1.1 - repo: https://github.com/pre-commit/mirrors-clang-format rev: v16.0.6 hooks: @@ -42,7 +42,7 @@ repos: types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.2.0 + rev: v0.4.0 hooks: - id: verify-copyright files: | diff --git a/CHANGELOG.md b/CHANGELOG.md index f85c7d03f03..689a214751f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,88 @@ +# cugraph 24.08.00 (7 Aug 2024) + +## 🚨 Breaking Changes + +- Use MNMG version of ECG in python layer instead, and remove legacy ECG and Louvain ([#4514](https://github.com/rapidsai/cugraph/pull/4514)) [@naimnv](https://github.com/naimnv) + +## 🐛 Bug Fixes + +- add setuptools to host requirements for conda packages that need it ([#4582](https://github.com/rapidsai/cugraph/pull/4582)) [@jameslamb](https://github.com/jameslamb) +- Add pylibcugraph dependency on pylibraft. ([#4570](https://github.com/rapidsai/cugraph/pull/4570)) [@bdice](https://github.com/bdice) +- Fix build error with NO_CUGRAPH_OPS ([#4563](https://github.com/rapidsai/cugraph/pull/4563)) [@seunghwak](https://github.com/seunghwak) +- [BUG] Fix Failing WholeGraph Tests ([#4560](https://github.com/rapidsai/cugraph/pull/4560)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Temporarily Disable Feature Store Tests with WholeGraph ([#4559](https://github.com/rapidsai/cugraph/pull/4559)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Fix MG `katz_centrality`: Check if DataFrame Arg is Not None ([#4555](https://github.com/rapidsai/cugraph/pull/4555)) [@nv-rliu](https://github.com/nv-rliu) +- nx-cugraph: fix `from_pandas_edgekey` given edgekey but not edgeattr ([#4550](https://github.com/rapidsai/cugraph/pull/4550)) [@eriknw](https://github.com/eriknw) +- Fix triangle count test bug ([#4549](https://github.com/rapidsai/cugraph/pull/4549)) [@jnke2016](https://github.com/jnke2016) +- [BUG] Use the Correct WG Communicator ([#4548](https://github.com/rapidsai/cugraph/pull/4548)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Add Additional Check For SSSP Source Vertex & Fix SSSP Benchmark ([#4541](https://github.com/rapidsai/cugraph/pull/4541)) [@nv-rliu](https://github.com/nv-rliu) +- Fix OOM Bug for Jaccard, Sorensen, and Overlap benchmarks ([#4524](https://github.com/rapidsai/cugraph/pull/4524)) [@nv-rliu](https://github.com/nv-rliu) +- Distribute start_list across ranks ([#4519](https://github.com/rapidsai/cugraph/pull/4519)) [@jnke2016](https://github.com/jnke2016) +- [FIX] Skip Distributed Sampler Tests if PyTorch with CUDA is not Available ([#4518](https://github.com/rapidsai/cugraph/pull/4518)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- [BUG] Fix a hang issue in MG triangle counts (when invoked with a small number of vertices to update triangle counts) ([#4517](https://github.com/rapidsai/cugraph/pull/4517)) [@seunghwak](https://github.com/seunghwak) +- Update MG Benchmark List ([#4516](https://github.com/rapidsai/cugraph/pull/4516)) [@nv-rliu](https://github.com/nv-rliu) +- Fix TensorProductConv test and improve docs ([#4480](https://github.com/rapidsai/cugraph/pull/4480)) [@tingyu66](https://github.com/tingyu66) +- Test nx-cugraph package instead of editable install ([#4442](https://github.com/rapidsai/cugraph/pull/4442)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) + +## 📖 Documentation + +- DOC: typo in nx_transition.rst ([#4491](https://github.com/rapidsai/cugraph/pull/4491)) [@raybellwaves](https://github.com/raybellwaves) +- Doc cleanup for nx-cugraph: fixed typos, cleaned up various descriptions, renamed notebook to match naming convetion. ([#4478](https://github.com/rapidsai/cugraph/pull/4478)) [@rlratzel](https://github.com/rlratzel) +- [DOC] Minor Improvements to cuGraph-PyG Documentation ([#4460](https://github.com/rapidsai/cugraph/pull/4460)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) + +## 🚀 New Features + +- Use MNMG version of ECG in python layer instead, and remove legacy ECG and Louvain ([#4514](https://github.com/rapidsai/cugraph/pull/4514)) [@naimnv](https://github.com/naimnv) +- c_api and plc binding for lookup src dst using edge ids and type(s) ([#4494](https://github.com/rapidsai/cugraph/pull/4494)) [@naimnv](https://github.com/naimnv) +- Forward merge branch-24.06 into branch-24.08 ([#4489](https://github.com/rapidsai/cugraph/pull/4489)) [@nv-rliu](https://github.com/nv-rliu) +- [FEA] New Graph Interface and Loaders for Distributed Sampling in DGL ([#4486](https://github.com/rapidsai/cugraph/pull/4486)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- compute cosine similarity for vertex pairs ([#4482](https://github.com/rapidsai/cugraph/pull/4482)) [@naimnv](https://github.com/naimnv) +- Define heterogeneous renumbering API ([#4463](https://github.com/rapidsai/cugraph/pull/4463)) [@seunghwak](https://github.com/seunghwak) +- Lookup edge src dst using edge id and type ([#4449](https://github.com/rapidsai/cugraph/pull/4449)) [@naimnv](https://github.com/naimnv) +- Biased sampling ([#4443](https://github.com/rapidsai/cugraph/pull/4443)) [@seunghwak](https://github.com/seunghwak) + +## 🛠️ Improvements + +- nx-cugraph: check networkx version ([#4571](https://github.com/rapidsai/cugraph/pull/4571)) [@eriknw](https://github.com/eriknw) +- nx-cugraph: add `G.__networkx_cache__` to enable graph conversion caching ([#4567](https://github.com/rapidsai/cugraph/pull/4567)) [@eriknw](https://github.com/eriknw) +- split up CUDA-suffixed dependencies in dependencies.yaml ([#4552](https://github.com/rapidsai/cugraph/pull/4552)) [@jameslamb](https://github.com/jameslamb) +- Use workflow branch 24.08 again ([#4544](https://github.com/rapidsai/cugraph/pull/4544)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Support non p2p configuration when initializing the comms ([#4543](https://github.com/rapidsai/cugraph/pull/4543)) [@jnke2016](https://github.com/jnke2016) +- Fix Warning from `simpleDistributedGraph.py` ([#4540](https://github.com/rapidsai/cugraph/pull/4540)) [@nv-rliu](https://github.com/nv-rliu) +- Create a graph from the edge list in multiple chunks ([#4539](https://github.com/rapidsai/cugraph/pull/4539)) [@seunghwak](https://github.com/seunghwak) +- nx-cugraph: add dijkstra sssp functions ([#4538](https://github.com/rapidsai/cugraph/pull/4538)) [@eriknw](https://github.com/eriknw) +- nx-cugraph: add `from_dict_of_lists` and `to_dict_of_lists` ([#4537](https://github.com/rapidsai/cugraph/pull/4537)) [@eriknw](https://github.com/eriknw) +- Ensure `get_test_data.sh` doesn't re-download datasets ([#4536](https://github.com/rapidsai/cugraph/pull/4536)) [@trxcllnt](https://github.com/trxcllnt) +- Define and Implement C API for biased sampling ([#4535](https://github.com/rapidsai/cugraph/pull/4535)) [@ChuckHastings](https://github.com/ChuckHastings) +- Build and test with CUDA 12.5.1 ([#4534](https://github.com/rapidsai/cugraph/pull/4534)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Refactor C++ unit tests to allow finer grained filtering ([#4533](https://github.com/rapidsai/cugraph/pull/4533)) [@ChuckHastings](https://github.com/ChuckHastings) +- [IMP] Set the Default WG Memory Type to 'distributed' for the MNMG PyG Example ([#4532](https://github.com/rapidsai/cugraph/pull/4532)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- nx-cugraph: add `relabel_nodes` and `convert_node_labels_to_integers` ([#4531](https://github.com/rapidsai/cugraph/pull/4531)) [@eriknw](https://github.com/eriknw) +- Add `-cuXX` suffixed versions of cugraph-service-client dependency to pyproject.toml's project.dependencies list ([#4530](https://github.com/rapidsai/cugraph/pull/4530)) [@trxcllnt](https://github.com/trxcllnt) +- Further optimize `from_pandas_edgelist` with cudf ([#4528](https://github.com/rapidsai/cugraph/pull/4528)) [@eriknw](https://github.com/eriknw) +- Performance optimize BFS (including direction optimizing BFS implementation, mainly for single-GPU) ([#4527](https://github.com/rapidsai/cugraph/pull/4527)) [@seunghwak](https://github.com/seunghwak) +- Add CUDA_STATIC_MATH_LIBRARIES ([#4526](https://github.com/rapidsai/cugraph/pull/4526)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Better handle cudf.pandas in `from_pandas_edgelist` ([#4525](https://github.com/rapidsai/cugraph/pull/4525)) [@eriknw](https://github.com/eriknw) +- Skip the benchmark ctests within CI ([#4522](https://github.com/rapidsai/cugraph/pull/4522)) [@ChuckHastings](https://github.com/ChuckHastings) +- remove thriftpy2 ceiling ([#4521](https://github.com/rapidsai/cugraph/pull/4521)) [@jameslamb](https://github.com/jameslamb) +- Avoid --find-links in wheel jobs ([#4509](https://github.com/rapidsai/cugraph/pull/4509)) [@jameslamb](https://github.com/jameslamb) +- Refactor code base to reduce memory requirement for building libcugraph ([#4506](https://github.com/rapidsai/cugraph/pull/4506)) [@naimnv](https://github.com/naimnv) +- Tweak rmm configuration for C++ unit tests ([#4503](https://github.com/rapidsai/cugraph/pull/4503)) [@ChuckHastings](https://github.com/ChuckHastings) +- Expose new all-pairs Similarity algorithms ([#4502](https://github.com/rapidsai/cugraph/pull/4502)) [@jnke2016](https://github.com/jnke2016) +- remove openmpi ceiling ([#4496](https://github.com/rapidsai/cugraph/pull/4496)) [@jameslamb](https://github.com/jameslamb) +- Cut peak memory footprint in per_v_transform_reduce_dst_key_aggregated_outgoing_e ([#4484](https://github.com/rapidsai/cugraph/pull/4484)) [@seunghwak](https://github.com/seunghwak) +- Skip MG `dgl_uniform_sampler` test in nightlies ([#4479](https://github.com/rapidsai/cugraph/pull/4479)) [@nv-rliu](https://github.com/nv-rliu) +- Remove text builds of documentation ([#4468](https://github.com/rapidsai/cugraph/pull/4468)) [@vyasr](https://github.com/vyasr) +- [IMP] Limit the Test Data Size when Running CI in `gcn_dist_sg.py` ([#4461](https://github.com/rapidsai/cugraph/pull/4461)) [@alexbarghi-nv](https://github.com/alexbarghi-nv) +- Forward Merge branch-24.06 into branch-24.08 ([#4454](https://github.com/rapidsai/cugraph/pull/4454)) [@nv-rliu](https://github.com/nv-rliu) +- Properly clean up python directories ([#4453](https://github.com/rapidsai/cugraph/pull/4453)) [@ChuckHastings](https://github.com/ChuckHastings) +- Fixes for On-Going MG Test Failures ([#4450](https://github.com/rapidsai/cugraph/pull/4450)) [@nv-rliu](https://github.com/nv-rliu) +- remove unnecessary 'setuptools' and 'wheel' dependencies ([#4448](https://github.com/rapidsai/cugraph/pull/4448)) [@jameslamb](https://github.com/jameslamb) +- MG Implementation K-Truss ([#4438](https://github.com/rapidsai/cugraph/pull/4438)) [@jnke2016](https://github.com/jnke2016) +- Overhaul ops-codeowners ([#4409](https://github.com/rapidsai/cugraph/pull/4409)) [@raydouglass](https://github.com/raydouglass) +- Use rapids-build-backend ([#4393](https://github.com/rapidsai/cugraph/pull/4393)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Optimize K-Truss ([#4375](https://github.com/rapidsai/cugraph/pull/4375)) [@jnke2016](https://github.com/jnke2016) + # cugraph 24.06.00 (5 Jun 2024) ## 🚨 Breaking Changes diff --git a/VERSION b/VERSION index 7c7ba04436f..af28c42b528 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.10.00 +24.12.00 diff --git a/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb b/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb index 95b456c7812..4681c8ec825 100644 --- a/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb +++ b/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb @@ -18,7 +18,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } diff --git a/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb index d3b054bb0ee..2c4a934827a 100644 --- a/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb +++ b/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb @@ -176,7 +176,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n", " dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n" ] }, diff --git a/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb index ea1e9b34965..ecd111dabdf 100644 --- a/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb +++ b/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb @@ -26,7 +26,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } @@ -190,7 +190,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n", " dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n" ] }, @@ -278,7 +278,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n", " [b'\\xad\\xd1\\xe3\\x9c\\x96\\x83O\\xb3\\xba1\\x86\\x94\\xb6\\ ... =int32), False]\n", "Consider scattering large objects ahead of time\n", "with client.scatter to reduce scheduler burden and \n", diff --git a/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py b/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py index 539fe333b1e..55ff0043e30 100644 --- a/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py +++ b/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -126,4 +126,4 @@ def sampling_func(g, seed_nodes, labels, train_loader): st = time.time() sampling_func(g, subset_split_idx["train"], labels, train_loader) et = time.time() - print(f"Sampling time taken = {et-st} s") + print(f"Sampling time taken = {et - st} s") diff --git a/benchmarks/cugraph/notebooks/feature_storage.ipynb b/benchmarks/cugraph/notebooks/feature_storage.ipynb index 7413ac00cde..440d76fbdb4 100644 --- a/benchmarks/cugraph/notebooks/feature_storage.ipynb +++ b/benchmarks/cugraph/notebooks/feature_storage.ipynb @@ -18,7 +18,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py index 8c46095a7da..083acdde2f4 100644 --- a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py +++ b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py @@ -266,7 +266,7 @@ def uns_func(*args, **kwargs): @pytest.mark.managedmem_off @pytest.mark.poolallocator_on @pytest.mark.parametrize("batch_size", params.batch_sizes.values()) -@pytest.mark.parametrize("fanout", [params.fanout_10_25, params.fanout_5_10_15]) +@pytest.mark.parametrize("fanout", [params.fanout_10_25]) @pytest.mark.parametrize( "with_replacement", [False], ids=lambda v: f"with_replacement={v}" ) @@ -287,6 +287,8 @@ def bench_cugraph_uniform_neighbor_sample( start_list=uns_args["start_list"], fanout_vals=uns_args["fanout"], with_replacement=uns_args["with_replacement"], + use_legacy_names=False, + with_edge_properties=True, ) """ dtmap = {"int32": 32 // 8, "int64": 64 // 8} diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md new file mode 100644 index 00000000000..781550fa560 --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/README.md @@ -0,0 +1,54 @@ +## `nx-cugraph` Benchmarks + +### Overview + +This directory contains a set of scripts designed to benchmark NetworkX with the `nx-cugraph` backend and deliver a report that summarizes the speed-up and runtime deltas over default NetworkX. + +Our current benchmarks provide the following datasets: + +| Dataset | Nodes | Edges | Directed | +| -------- | ------- | ------- | ------- | +| netscience | 1,461 | 5,484 | Yes | +| email-Eu-core | 1,005 | 25,571 | Yes | +| cit-Patents | 3,774,768 | 16,518,948 | Yes | +| hollywood | 1,139,905 | 57,515,616 | No | +| soc-LiveJournal1 | 4,847,571 | 68,993,773 | Yes | + + + +### Scripts + +#### 1. `run-main-benchmarks.sh` +This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark. + +NOTE: If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running. + +**Usage:** + - Run with `--cpu-only`: + ```bash + ./run-main-benchmarks.sh --cpu-only + ``` + - Run with `--gpu-only`: + ```bash + ./run-main-benchmarks.sh --gpu-only + ``` + - Run without any arguments (all backends): + ```bash + ./run-main-benchmarks.sh + ``` + +#### 2. `get_graph_bench_dataset.py` +This script downloads the specified dataset using `cugraph.datasets`. + +**Usage:** + ```bash + python get_graph_bench_dataset.py [dataset] + ``` + +#### 3. `create_results_summary_page.py` +This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run. + +**Usage:** + ```bash + python create_results_summary_page.py > report.html + ``` diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index d40b5130827..f88d93c3f17 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -271,9 +271,8 @@ def bench_from_networkx(benchmark, graph_obj): # normalized_param_values = [True, False] -# k_param_values = [10, 100] normalized_param_values = [True] -k_param_values = [10] +k_param_values = [10, 100, 1000] @pytest.mark.parametrize( @@ -282,6 +281,10 @@ def bench_from_networkx(benchmark, graph_obj): @pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + if k > G.number_of_nodes(): + pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}") + result = benchmark.pedantic( target=backend_wrapper(nx.betweenness_centrality), args=(G,), @@ -305,6 +308,10 @@ def bench_edge_betweenness_centrality( benchmark, graph_obj, backend_wrapper, normalized, k ): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + if k > G.number_of_nodes(): + pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}") + result = benchmark.pedantic( target=backend_wrapper(nx.edge_betweenness_centrality), args=(G,), @@ -473,6 +480,26 @@ def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper): assert type(result) is dict +def bench_shortest_path(benchmark, graph_obj, backend_wrapper): + """ + This passes in the source node with the highest degree, but no target. + """ + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + + result = benchmark.pedantic( + target=backend_wrapper(nx.shortest_path), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) node = get_highest_degree_node(graph_obj) diff --git a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py new file mode 100644 index 00000000000..f1cc4b06ccc --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py @@ -0,0 +1,291 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import re +import pathlib +import json +import platform +import psutil +import socket +import subprocess + + +def get_formatted_time_value(time): + res = "" + if time < 1: + if time < 0.001: + units = "us" + time *= 1e6 + else: + units = "ms" + time *= 1e3 + else: + units = "s" + return f"{time:.3f}{units}" + + +def get_all_benchmark_info(): + benchmarks = {} + # Populate benchmarks dir from .json files + for json_file in logs_dir.glob("*.json"): + try: + data = json.loads(open(json_file).read()) + except json.decoder.JSONDecodeError: + continue + + for benchmark_run in data["benchmarks"]: + # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]" + name = benchmark_run["name"] + + algo_name = name.split("[")[0] + if algo_name.startswith("bench_"): + algo_name = algo_name[6:] + # special case for betweenness_centrality + match = k_patt.match(name) + if match is not None: + algo_name += f", k={match.group(1)}" + + match = dataset_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" + ) + dataset = match.group(1) + if dataset.endswith("-backend"): + dataset = dataset[:-8] + + match = backend_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" + ) + backend = match.group(1) + if backend == "None": + backend = "networkx" + + runtime = benchmark_run["stats"]["mean"] + benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[ + dataset + ] = runtime + return benchmarks + + +def compute_perf_vals(cugraph_runtime, networkx_runtime): + speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X" + delta = networkx_runtime - cugraph_runtime + if abs(delta) < 1: + if abs(delta) < 0.001: + units = "us" + delta *= 1e6 + else: + units = "ms" + delta *= 1e3 + else: + units = "s" + delta_string = f"{delta:.3f}{units}" + + return (speedup_string, delta_string) + + +def get_mem_info(): + return round(psutil.virtual_memory().total / (1024**3), 2) + + +def get_cuda_version(): + output = subprocess.check_output("nvidia-smi", shell=True).decode() + try: + return next( + line.split("CUDA Version: ")[1].split()[0] + for line in output.splitlines() + if "CUDA Version" in line + ) + except subprocess.CalledProcessError: + return "Failed to get CUDA version." + + +def get_first_gpu_info(): + try: + gpu_info = ( + subprocess.check_output( + "nvidia-smi --query-gpu=name,memory.total,memory.free,memory.used --format=csv,noheader", + shell=True, + ) + .decode() + .strip() + ) + if gpu_info: + gpus = gpu_info.split("\n") + num_gpus = len(gpus) + first_gpu = gpus[0] # Get the information for the first GPU + gpu_name, mem_total, _, _ = first_gpu.split(",") + return f"{num_gpus} x {gpu_name.strip()} ({round(int(mem_total.strip().split()[0]) / (1024), 2)} GB)" + else: + print("No GPU found or unable to query GPU details.") + except subprocess.CalledProcessError: + print("Failed to execute nvidia-smi. No GPU information available.") + + +def get_system_info(): + print('
') + print(f"

Hostname: {socket.gethostname()}

") + print( + f'

Operating System: {platform.system()} {platform.release()}

' + ) + print(f'

Kernel Version : {platform.version()}

') + with open("/proc/cpuinfo") as f: + print( + f'

CPU: {next(line.strip().split(": ")[1] for line in f if "model name" in line)} ({psutil.cpu_count(logical=False)} cores)

' + ) + print(f'

Memory: {get_mem_info()} GB

') + print(f"

GPU: {get_first_gpu_info()}

") + print(f"

CUDA Version: {get_cuda_version()}

") + + +if __name__ == "__main__": + logs_dir = pathlib.Path("logs") + + dataset_patt = re.compile(".*ds=([\w-]+).*") + backend_patt = re.compile(".*backend=(\w+).*") + k_patt = re.compile(".*k=(10*).*") + + # Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset + benchmarks = get_all_benchmark_info() + + # dump HTML table + ordered_datasets = [ + "netscience", + "email_Eu_core", + "cit-patents", + "hollywood", + "soc-livejournal1", + ] + # dataset, # Node, # Edge, Directed info + dataset_meta = { + "netscience": ["1,461", "5,484", "Yes"], + "email_Eu_core": ["1,005", "25,571", "Yes"], + "cit-patents": ["3,774,768", "16,518,948", "Yes"], + "hollywood": ["1,139,905", "57,515,616", "No"], + "soc-livejournal1": ["4,847,571", "68,993,773", "Yes"], + } + + print( + """ + + + + + + + + """ + ) + for ds in ordered_datasets: + print( + f" " + ) + print( + """ + + + """ + ) + for algo_name in sorted(benchmarks): + algo_runs = benchmarks[algo_name] + print(" ") + print(f" ") + # Proceed only if any results are present for both cugraph and NX + if "cugraph" in algo_runs and "networkx" in algo_runs: + cugraph_algo_runs = algo_runs["cugraph"] + networkx_algo_runs = algo_runs["networkx"] + datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs) + + # populate the table with speedup results for each dataset in the order + # specified in ordered_datasets. If results for a run using a dataset + # are not present for both cugraph and NX, output an empty cell. + for dataset in ordered_datasets: + if dataset in datasets_in_both: + cugraph_runtime = cugraph_algo_runs[dataset] + networkx_runtime = networkx_algo_runs[dataset] + (speedup, runtime_delta) = compute_perf_vals( + cugraph_runtime=cugraph_runtime, + networkx_runtime=networkx_runtime, + ) + nx_formatted = get_formatted_time_value(networkx_runtime) + cg_formatted = get_formatted_time_value(cugraph_runtime) + print( + f" " + ) + else: + print(f" ") + + # If a comparison between cugraph and NX cannot be made, output empty cells + # for each dataset + else: + for _ in range(len(ordered_datasets)): + print(" ") + print(" ") + print( + """ + \n
Dataset
Nodes
Edges
Directed
{ds}
{dataset_meta[ds][0]}
{dataset_meta[ds][1]}
{dataset_meta[ds][2]}
{algo_name}{nx_formatted} / {cg_formatted}
{speedup}
{runtime_delta}
+ \n
\n""") diff --git a/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py b/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py new file mode 100644 index 00000000000..5a0a15da8ee --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py @@ -0,0 +1,35 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Checks if a particular dataset has been downloaded inside the datasets dir +(RAPIDS_DATAEST_ROOT_DIR). If not, the file will be downloaded using the +datasets API. + +Positional Arguments: + 1) dataset name (e.g. 'email_Eu_core', 'cit-patents') + available datasets can be found here: `python/cugraph/cugraph/datasets/__init__.py` +""" + +import sys + +import cugraph.datasets as cgds + + +if __name__ == "__main__": + # download and store dataset (csv) by using the Datasets API + dataset = sys.argv[1].replace("-", "_") + dataset_obj = getattr(cgds, dataset) + + if not dataset_obj.get_path().exists(): + dataset_obj.get_edgelist(download=True) diff --git a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh new file mode 100755 index 00000000000..3059e3d4bdf --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# location to store datasets used for benchmarking +export RAPIDS_DATASET_ROOT_DIR=/datasets/cugraph +mkdir -p logs + +# list of algos, datasets, and back-ends to use in combinations +algos=" + pagerank + betweenness_centrality + louvain + shortest_path + weakly_connected_components + triangles + bfs_predecessors +" +datasets=" + netscience + email_Eu_core + cit-patents + hollywood + soc-livejournal +" +# None backend is default networkx +# cugraph-preconvert backend is nx-cugraph +backends=" + None + cugraph-preconverted +" +# check for --cpu-only or --gpu-only args +if [[ "$#" -eq 1 ]]; then + case $1 in + --cpu-only) + backends="None" + ;; + --gpu-only) + backends="cugraph-preconverted" + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +fi + +for algo in $algos; do + for dataset in $datasets; do + # this script can be used to download benchmarking datasets by name via cugraph.datasets + python get_graph_bench_dataset.py $dataset + for backend in $backends; do + name="${backend}__${algo}__${dataset}" + echo "Running: $backend, $dataset, bench_$algo" + # command to preproduce test + # echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py" + pytest -sv \ + -k "$backend and $dataset and bench_$algo and not 1000" \ + --benchmark-json="logs/${name}.json" \ + bench_algos.py 2>&1 | tee "logs/${name}.out" + done + done +done diff --git a/benchmarks/pytest.ini b/benchmarks/pytest.ini index fe7fc31b6d6..d692b78de37 100644 --- a/benchmarks/pytest.ini +++ b/benchmarks/pytest.ini @@ -8,6 +8,7 @@ testpaths = addopts = --benchmark-columns="min, max, mean, stddev, outliers" + --tb=native markers = managedmem_on: RMM managed memory enabled diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 707c61e4d3e..f3979ab3049 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -17,7 +17,7 @@ cd "${package_dir}" python -m pip wheel \ -w dist \ - -vvv \ + -v \ --no-deps \ --disable-pip-version-check \ --extra-index-url https://pypi.nvidia.com \ @@ -30,7 +30,23 @@ if [[ ${package_name} == "nx-cugraph" ]] || \ [[ ${package_name} == "cugraph-equivariant" ]]; then RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist else + case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXCLUDE_ARGS=( + --exclude "libcublas.so.12" + --exclude "libcublasLt.so.12" + --exclude "libcurand.so.10" + --exclude "libcusolver.so.11" + --exclude "libcusparse.so.12" + --exclude "libnvJitLink.so.12" + ) + ;; + 11.*) + EXCLUDE_ARGS=() + ;; + esac + mkdir -p final_dist - python -m auditwheel repair -w final_dist dist/* + python -m auditwheel repair -w final_dist "${EXCLUDE_ARGS[@]}" dist/* RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist fi diff --git a/ci/build_wheel_cugraph.sh b/ci/build_wheel_cugraph.sh index 6f1b23923ff..20d9bf47e3e 100755 --- a/ci/build_wheel_cugraph.sh +++ b/ci/build_wheel_cugraph.sh @@ -19,8 +19,16 @@ export PIP_CONSTRAINT="${PWD}/constraints.txt" PARALLEL_LEVEL=$(python -c \ "from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))") +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" + ;; + 11.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" + ;; +esac -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/${EXTRA_CMAKE_ARGS}" export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh cugraph python/cugraph diff --git a/ci/build_wheel_pylibcugraph.sh b/ci/build_wheel_pylibcugraph.sh index ee33ab4a82d..fa967b0be29 100755 --- a/ci/build_wheel_pylibcugraph.sh +++ b/ci/build_wheel_pylibcugraph.sh @@ -6,7 +6,16 @@ set -euo pipefail PARALLEL_LEVEL=$(python -c \ "from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))") -export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +case "${RAPIDS_CUDA_VERSION}" in + 12.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" + ;; + 11.*) + EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=OFF" + ;; +esac + +export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/${EXTRA_CMAKE_ARGS}" export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}" ./ci/build_wheel.sh pylibcugraph python/pylibcugraph diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 08c22fca02e..5859ebde953 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -30,7 +30,7 @@ CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})" +NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})" echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" @@ -45,7 +45,8 @@ function sed_runner() { echo "${NEXT_FULL_TAG}" > VERSION # Need to distutils-normalize the original version -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") +NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))") DEPENDENCIES=( cudf @@ -71,23 +72,30 @@ DEPENDENCIES=( rmm rapids-dask-dependency ) +UCXX_DEPENDENCIES=( + ucx-py +) for FILE in dependencies.yaml conda/environments/*.yaml python/cugraph-{pyg,dgl}/conda/*.yaml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done - sed_runner "/-.* ucx-py\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0/g" "${FILE}" + for DEP in "${UCXX_DEPENDENCIES[@]}"; do + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" + done done for FILE in python/**/pyproject.toml python/**/**/pyproject.toml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/\"${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}" done - sed_runner "/\"ucx-py\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_UCX_PY_VERSION}.*,>=0.0.0a0\"/g" "${FILE}" + for DEP in "${UCXX_DEPENDENCIES[@]}"; do + sed_runner "/\"${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*\"/==${NEXT_UCXX_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}" + done done # ucx-py version -sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}.*\"/}" conda/recipes/cugraph/conda_build_config.yaml -sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}.*\"/}" conda/recipes/cugraph-service/conda_build_config.yaml -sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCX_PY_VERSION}.*\"/}" conda/recipes/pylibcugraph/conda_build_config.yaml +for FILE in conda/recipes/*/conda_build_config.yaml; do + sed_runner "/^ucx_py_version:$/ {n;s/.*/ - \"${NEXT_UCXX_SHORT_TAG_PEP440}.*\"/}" "${FILE}" +done # CI files for FILE in .github/workflows/*.yaml; do diff --git a/ci/run_nx_cugraph_pytests.sh b/ci/run_nx_cugraph_pytests.sh index b0caffd0a0f..0e309d1e2d4 100755 --- a/ci/run_nx_cugraph_pytests.sh +++ b/ci/run_nx_cugraph_pytests.sh @@ -6,4 +6,5 @@ set -euo pipefail # Support invoking run_nx_cugraph_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/nx-cugraph/nx_cugraph -pytest --capture=no --cache-clear --benchmark-disable "$@" tests +NX_CUGRAPH_USE_COMPAT_GRAPHS=False pytest --capture=no --cache-clear --benchmark-disable "$@" tests +NX_CUGRAPH_USE_COMPAT_GRAPHS=True pytest --capture=no --cache-clear --benchmark-disable "$@" tests diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index ba106d34a46..6c14870164e 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -38,7 +38,7 @@ nvidia-smi # RAPIDS_DATASET_ROOT_DIR is used by test scripts export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" pushd "${RAPIDS_DATASET_ROOT_DIR}" -./get_test_data.sh --subset +./get_test_data.sh --cpp_ci_subset popd export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/ diff --git a/ci/test_python.sh b/ci/test_python.sh index e8c8272e8d6..f21a06cf061 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -108,7 +108,7 @@ echo "nx-cugraph coverage from networkx tests: $_coverage" echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }' # Ensure all algorithms were called by comparing covered lines to function lines. # Run our tests again (they're fast enough) to add their coverage, then create coverage.json -pytest \ +NX_CUGRAPH_USE_COMPAT_GRAPHS=False pytest \ --pyargs nx_cugraph \ --config-file=../pyproject.toml \ --cov-config=../pyproject.toml \ @@ -159,7 +159,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then cugraph \ cugraph-dgl \ 'dgl>=1.1.0.cu*,<=2.0.0.cu*' \ - 'pytorch>=2.0' \ + 'pytorch>=2.3,<2.4' \ 'cuda-version=11.8' rapids-print-env @@ -198,10 +198,10 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then # TODO re-enable logic once CUDA 12 is testable #if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then CONDA_CUDA_VERSION="11.8" - PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html" + PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu118.html" #else # CONDA_CUDA_VERSION="12.1" - # PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html" + # PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu121.html" #fi # Will automatically install built dependencies of cuGraph-PyG diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 158704e08d1..e3690dfde6e 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -37,6 +37,7 @@ else DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ + NX_CUGRAPH_USE_COMPAT_GRAPHS=False \ python -m pytest \ -v \ --import-mode=append \ diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 564b46cb07e..9b79cb17fe4 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -32,18 +32,8 @@ fi PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}" DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" -# Starting from 2.2, PyTorch wheels depend on nvidia-nccl-cuxx>=2.19 wheel and -# dynamically link to NCCL. RAPIDS CUDA 11 CI images have an older NCCL version that -# might shadow the newer NCCL required by PyTorch during import (when importing -# `cupy` before `torch`). -if [[ "${NCCL_VERSION}" < "2.19" ]]; then - PYTORCH_VER="2.1.0" -else - PYTORCH_VER="2.3.0" -fi - rapids-logger "Installing PyTorch and DGL" -rapids-retry python -m pip install "torch==${PYTORCH_VER}" --index-url ${PYTORCH_URL} +rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL} rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL} python -m pytest python/cugraph-dgl/tests diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index c55ae033344..8f4b16a2dec 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -29,13 +29,13 @@ export CI_RUN=1 if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then PYTORCH_URL="https://download.pytorch.org/whl/cu118" - PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu118.html" + PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu118.html" else PYTORCH_URL="https://download.pytorch.org/whl/cu121" - PYG_URL="https://data.pyg.org/whl/torch-2.1.0+cu121.html" + PYG_URL="https://data.pyg.org/whl/torch-2.3.0+cu121.html" fi rapids-logger "Installing PyTorch and PyG dependencies" -rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL} +rapids-retry python -m pip install torch==2.3.0 --index-url ${PYTORCH_URL} rapids-retry python -m pip install "torch-geometric>=2.5,<2.6" rapids-retry python -m pip install \ ogb \ diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index f0b86c791f8..a23c2395646 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -16,55 +16,55 @@ dependencies: - cuda-nvtx - cuda-version=11.8 - cudatoolkit -- cudf==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 - doxygen - fsspec>=0.6.0 - gcc_linux-64=11.* - graphviz - ipython -- libcudf==24.10.*,>=0.0.0a0 -- libcugraphops==24.10.*,>=0.0.0a0 -- libraft-headers==24.10.*,>=0.0.0a0 -- libraft==24.10.*,>=0.0.0a0 -- librmm==24.10.*,>=0.0.0a0 +- libcudf==24.12.*,>=0.0.0a0 +- libcugraphops==24.12.*,>=0.0.0a0 +- libraft-headers==24.12.*,>=0.0.0a0 +- libraft==24.12.*,>=0.0.0a0 +- librmm==24.12.*,>=0.0.0a0 - nbsphinx -- nccl>=2.9.9 +- nccl>=2.19 - networkx>=2.5.1 - networkx>=3.0 - ninja - notebook>=0.5.0 - numba>=0.57 -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 +- ogb - openmpi -- packaging>=21 - pandas - pre-commit - pydantic - pydata-sphinx-theme -- pylibcugraphops==24.10.*,>=0.0.0a0 -- pylibraft==24.10.*,>=0.0.0a0 -- pylibwholegraph==24.10.*,>=0.0.0a0 +- pylibcugraphops==24.12.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 +- pylibwholegraph==24.12.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-mpl - pytest-xdist - python-louvain -- pytorch>=2.0,<2.2.0a0 -- raft-dask==24.10.*,>=0.0.0a0 +- pytorch>=2.3,<2.4.0a0 +- raft-dask==24.12.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark - requests -- rmm==24.10.*,>=0.0.0a0 -- scikit-build-core>=0.7.0 +- rmm==24.12.*,>=0.0.0a0 +- scikit-build-core>=0.10.0 - scikit-learn>=0.23.1 - scipy - setuptools>=61.0.0 @@ -74,8 +74,9 @@ dependencies: - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 - torchdata +- torchmetrics - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 - wget - wheel name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index ebded3eec92..eca10584304 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -18,58 +18,58 @@ dependencies: - cuda-nvtx-dev - cuda-profiler-api - cuda-version=12.5 -- cudf==24.10.*,>=0.0.0a0 +- cudf==24.12.*,>=0.0.0a0 - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.10.*,>=0.0.0a0 -- dask-cudf==24.10.*,>=0.0.0a0 +- dask-cuda==24.12.*,>=0.0.0a0 +- dask-cudf==24.12.*,>=0.0.0a0 - doxygen - fsspec>=0.6.0 - gcc_linux-64=11.* - graphviz - ipython - libcublas-dev -- libcudf==24.10.*,>=0.0.0a0 -- libcugraphops==24.10.*,>=0.0.0a0 +- libcudf==24.12.*,>=0.0.0a0 +- libcugraphops==24.12.*,>=0.0.0a0 - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==24.10.*,>=0.0.0a0 -- libraft==24.10.*,>=0.0.0a0 -- librmm==24.10.*,>=0.0.0a0 +- libraft-headers==24.12.*,>=0.0.0a0 +- libraft==24.12.*,>=0.0.0a0 +- librmm==24.12.*,>=0.0.0a0 - nbsphinx -- nccl>=2.9.9 +- nccl>=2.19 - networkx>=2.5.1 - networkx>=3.0 - ninja - notebook>=0.5.0 - numba>=0.57 -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc +- ogb - openmpi -- packaging>=21 - pandas - pre-commit - pydantic - pydata-sphinx-theme -- pylibcugraphops==24.10.*,>=0.0.0a0 -- pylibraft==24.10.*,>=0.0.0a0 -- pylibwholegraph==24.10.*,>=0.0.0a0 +- pylibcugraphops==24.12.*,>=0.0.0a0 +- pylibraft==24.12.*,>=0.0.0a0 +- pylibwholegraph==24.12.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-mpl - pytest-xdist - python-louvain -- pytorch>=2.0,<2.2.0a0 -- raft-dask==24.10.*,>=0.0.0a0 +- pytorch>=2.3,<2.4.0a0 +- raft-dask==24.12.*,>=0.0.0a0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 -- rapids-dask-dependency==24.10.*,>=0.0.0a0 +- rapids-dask-dependency==24.12.*,>=0.0.0a0 - recommonmark - requests -- rmm==24.10.*,>=0.0.0a0 -- scikit-build-core>=0.7.0 +- rmm==24.12.*,>=0.0.0a0 +- scikit-build-core>=0.10.0 - scikit-learn>=0.23.1 - scipy - setuptools>=61.0.0 @@ -79,8 +79,9 @@ dependencies: - sphinxcontrib-websupport - thriftpy2!=0.5.0,!=0.5.1 - torchdata +- torchmetrics - ucx-proc=*=gpu -- ucx-py==0.40.*,>=0.0.0a0 +- ucx-py==0.41.*,>=0.0.0a0 - wget - wheel name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index d1cf6fcd9e9..c80ca6890a8 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -27,11 +27,11 @@ requirements: - cugraph ={{ version }} - dgl >=1.1.0.cu* - numba >=0.57 - - numpy >=1.23,<2.0a0 + - numpy >=1.23,<3.0a0 - pylibcugraphops ={{ minor_version }} - tensordict >=0.1.2 - python - - pytorch >=2.0 + - pytorch >=2.3,<2.4.0a0 - cupy >=12.0.0 tests: diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 2e1788ac0c6..38d4a3d7d15 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -29,9 +29,9 @@ requirements: run: - rapids-dask-dependency ={{ minor_version }} - numba >=0.57 - - numpy >=1.23,<2.0a0 + - numpy >=1.23,<3.0a0 - python - - pytorch >=2.0 + - pytorch >=2.3,<2.4.0a0 - cupy >=12.0.0 - cugraph ={{ version }} - pylibcugraphops ={{ minor_version }} diff --git a/conda/recipes/cugraph-service/conda_build_config.yaml b/conda/recipes/cugraph-service/conda_build_config.yaml index 2ac251ab10a..67ed3e26b0e 100644 --- a/conda/recipes/cugraph-service/conda_build_config.yaml +++ b/conda/recipes/cugraph-service/conda_build_config.yaml @@ -1,2 +1,2 @@ ucx_py_version: - - "0.40.*" + - "0.41.*" diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml index c1027582c78..7df7573e2d0 100644 --- a/conda/recipes/cugraph-service/meta.yaml +++ b/conda/recipes/cugraph-service/meta.yaml @@ -63,7 +63,7 @@ outputs: - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - numba >=0.57 - - numpy >=1.23,<2.0a0 + - numpy >=1.23,<3.0a0 - python - rapids-dask-dependency ={{ minor_version }} - thriftpy2 >=0.4.15,!=0.5.0,!=0.5.1 diff --git a/conda/recipes/cugraph/conda_build_config.yaml b/conda/recipes/cugraph/conda_build_config.yaml index 2525441f92d..10f2e15c550 100644 --- a/conda/recipes/cugraph/conda_build_config.yaml +++ b/conda/recipes/cugraph/conda_build_config.yaml @@ -20,4 +20,4 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.40.*" + - "0.41.*" diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index fccc3a208d6..9f5a137faba 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -68,7 +68,7 @@ requirements: - python - raft-dask ={{ minor_version }} - rmm ={{ minor_version }} - - scikit-build-core >=0.7.0 + - scikit-build-core >=0.10.0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 run: - aiohttp diff --git a/conda/recipes/libcugraph/conda_build_config.yaml b/conda/recipes/libcugraph/conda_build_config.yaml index 26aa428d7f5..55bd635c330 100644 --- a/conda/recipes/libcugraph/conda_build_config.yaml +++ b/conda/recipes/libcugraph/conda_build_config.yaml @@ -17,7 +17,7 @@ doxygen_version: - ">=1.8.11" nccl_version: - - ">=2.9.9" + - ">=2.19" c_stdlib: - sysroot diff --git a/conda/recipes/nx-cugraph/meta.yaml b/conda/recipes/nx-cugraph/meta.yaml index d67287be757..263f53d9a8f 100644 --- a/conda/recipes/nx-cugraph/meta.yaml +++ b/conda/recipes/nx-cugraph/meta.yaml @@ -14,9 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - build: - number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} requirements: host: diff --git a/conda/recipes/pylibcugraph/conda_build_config.yaml b/conda/recipes/pylibcugraph/conda_build_config.yaml index 2525441f92d..10f2e15c550 100644 --- a/conda/recipes/pylibcugraph/conda_build_config.yaml +++ b/conda/recipes/pylibcugraph/conda_build_config.yaml @@ -20,4 +20,4 @@ c_stdlib_version: - "2.17" ucx_py_version: - - "0.40.*" + - "0.41.*" diff --git a/conda/recipes/pylibcugraph/meta.yaml b/conda/recipes/pylibcugraph/meta.yaml index 15632cfcc0e..54d29a68d91 100644 --- a/conda/recipes/pylibcugraph/meta.yaml +++ b/conda/recipes/pylibcugraph/meta.yaml @@ -65,7 +65,7 @@ requirements: - libcugraph ={{ version }} - pylibraft ={{ minor_version }} - python - - scikit-build-core >=0.7.0 + - scikit-build-core >=0.10.0 - rapids-build-backend>=0.3.1,<0.4.0.dev0 run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 441627cabce..b8eaba9d575 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -332,8 +332,12 @@ set(CUGRAPH_SOURCES src/sampling/neighbor_sampling_sg_v32_e64.cpp src/sampling/neighbor_sampling_sg_v32_e32.cpp src/sampling/neighbor_sampling_sg_v64_e64.cpp - src/sampling/renumber_sampled_edgelist_sg_v64_e64.cu - src/sampling/renumber_sampled_edgelist_sg_v32_e32.cu + src/sampling/negative_sampling_sg_v32_e64.cu + src/sampling/negative_sampling_sg_v32_e32.cu + src/sampling/negative_sampling_sg_v64_e64.cu + src/sampling/negative_sampling_mg_v32_e64.cu + src/sampling/negative_sampling_mg_v32_e32.cu + src/sampling/negative_sampling_mg_v64_e64.cu src/sampling/sampling_post_processing_sg_v64_e64.cu src/sampling/sampling_post_processing_sg_v32_e32.cu src/sampling/sampling_post_processing_sg_v32_e64.cu @@ -483,6 +487,7 @@ set(CUGRAPH_SOURCES src/centrality/betweenness_centrality_mg_v32_e32.cu src/centrality/betweenness_centrality_mg_v32_e64.cu src/tree/legacy/mst.cu + src/from_cugraph_ops/sampling_index.cu src/components/weakly_connected_components_sg_v64_e64.cu src/components/weakly_connected_components_sg_v32_e32.cu src/components/weakly_connected_components_sg_v32_e64.cu @@ -656,6 +661,7 @@ add_library(cugraph_c src/c_api/louvain.cpp src/c_api/triangle_count.cpp src/c_api/neighbor_sampling.cpp + src/c_api/negative_sampling.cpp src/c_api/labeling_result.cpp src/c_api/weakly_connected_components.cpp src/c_api/strongly_connected_components.cpp diff --git a/cpp/examples/developers/graph_operations/graph_operations.cu b/cpp/examples/developers/graph_operations/graph_operations.cu index 014cedcab7e..912f9f1fd46 100644 --- a/cpp/examples/developers/graph_operations/graph_operations.cu +++ b/cpp/examples/developers/graph_operations/graph_operations.cu @@ -131,7 +131,7 @@ create_graph(raft::handle_t const& handle, // if (multi_gpu) { - std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) = + std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) = cugraph::shuffle_external_edges(handle, std::move(d_edge_srcs), std::move(d_edge_dsts), @@ -215,10 +215,10 @@ void perform_example_graph_operations( graph_view); cugraph::update_edge_src_property( - handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache); + handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache.mutable_view()); cugraph::update_edge_dst_property( - handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache); + handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache.mutable_view()); rmm::device_uvector weighted_averages( size_of_the_vertex_partition_assigned_to_this_process, handle.get_stream()); @@ -259,10 +259,10 @@ void perform_example_graph_operations( graph_view); cugraph::update_edge_src_property( - handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache); + handle, graph_view, vertex_weights.begin(), src_vertex_weights_cache.mutable_view()); cugraph::update_edge_dst_property( - handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache); + handle, graph_view, vertex_weights.begin(), dst_vertex_weights_cache.mutable_view()); rmm::device_uvector weighted_averages( size_of_the_vertex_partition_assigned_to_this_process, handle.get_stream()); diff --git a/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu b/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu index ce02e3b2639..c261ff6d843 100644 --- a/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu +++ b/cpp/examples/developers/vertex_and_edge_partition/vertex_and_edge_partition.cu @@ -127,7 +127,7 @@ create_graph(raft::handle_t const& handle, // if (multi_gpu) { - std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) = + std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) = cugraph::shuffle_external_edges(handle, std::move(d_edge_srcs), std::move(d_edge_dsts), diff --git a/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp b/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp index a9e2a170208..db629117604 100644 --- a/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp +++ b/cpp/examples/users/multi_gpu_application/mg_graph_algorithms.cpp @@ -123,7 +123,7 @@ create_graph(raft::handle_t const& handle, // if (multi_gpu) { - std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore) = + std::tie(d_edge_srcs, d_edge_dsts, d_edge_wgts, std::ignore, std::ignore, std::ignore) = cugraph::shuffle_external_edges(handle, std::move(d_edge_srcs), std::move(d_edge_dsts), @@ -248,9 +248,8 @@ void run_graph_algorithms( std::cout); } -int main(int argc, char** argv) +void run_tests() { - initialize_mpi_and_set_device(argc, argv); std::unique_ptr handle = initialize_mg_handle(); // @@ -279,6 +278,7 @@ int main(int argc, char** argv) std::move(std::make_optional(edge_wgts)), renumber, is_symmetric); + // Non-owning view of the graph object auto graph_view = graph.view(); @@ -292,5 +292,14 @@ int main(int argc, char** argv) run_graph_algorithms( *handle, graph_view, edge_weight_view); + handle.release(); +} + +int main(int argc, char** argv) +{ + initialize_mpi_and_set_device(argc, argv); + + run_tests(); + RAFT_MPI_TRY(MPI_Finalize()); } diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 8ba39fa2328..7e5af4ac686 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1579,11 +1579,11 @@ std:: template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed = std::numeric_limits::max()); + size_t max_length); /** * @brief returns biased random walks from starting sources, where each path is of given @@ -1623,11 +1623,11 @@ uniform_random_walks(raft::handle_t const& handle, template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed = std::numeric_limits::max()); + size_t max_length); /** * @brief returns biased random walks with node2vec biases from starting sources, @@ -1670,13 +1670,13 @@ biased_random_walks(raft::handle_t const& handle, template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, weight_t p, - weight_t q, - uint64_t seed = std::numeric_limits::max()); + weight_t q); #ifndef NO_CUGRAPH_OPS /** @@ -1684,6 +1684,8 @@ node2vec_random_walks(raft::handle_t const& handle, * list of vertices and sample size per vertex. The output graph consists of the given * vertices with each vertex having at most `sample_size` neighbors from the original graph * + * @deprecated This API will be deprecated. uniform_neighbor_sample can be used instead. + * * @tparam graph_t Type of input graph/view (typically, graph_view_t, non-transposed and * single-gpu). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -1714,6 +1716,8 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle, * list of vertices and sample size per vertex. The output graph consists of the given * vertices with each vertex having at most `sample_size` neighbors from the original graph * + * @deprecated This API will be deprecated. uniform_neighbor_sample can be used instead. + * * @tparam graph_t Type of input graph/view (typically, graph_view_t, non-transposed and * single-gpu). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -1869,12 +1873,16 @@ void triangle_count(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. + * * @param do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). * * @return edge_property_t containing the edge triangle count */ template edge_property_t, edge_t> edge_triangle_count( - raft::handle_t const& handle, graph_view_t const& graph_view); + raft::handle_t const& handle, + graph_view_t const& graph_view, + bool do_expensive_check = false); /* * @brief Compute K-Truss. diff --git a/cpp/include/cugraph/detail/utility_wrappers.hpp b/cpp/include/cugraph/detail/utility_wrappers.hpp index 61ac1bd2804..3d99b85556b 100644 --- a/cpp/include/cugraph/detail/utility_wrappers.hpp +++ b/cpp/include/cugraph/detail/utility_wrappers.hpp @@ -87,6 +87,28 @@ void sequence_fill(rmm::cuda_stream_view const& stream_view, size_t size, value_t start_value); +/** + * @brief Fill a buffer with a sequence of values with the input stride + * + * Fills the buffer with the sequence with the input stride: + * {start_value, start_value+stride, start_value+stride*2, ..., start_value+stride*(size-1)} + * + * @tparam value_t type of the value to operate on + * + * @param[in] stream_view stream view + * @param[out] d_value device array to fill + * @param[in] size number of elements in array + * @param[in] start_value starting value for sequence + * @param[in] stride input stride + * + */ +template +void stride_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t start_value, + value_t stride); + /** * @brief Compute the maximum vertex id of an edge list * diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index e1364f69991..866ab16ee97 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -988,63 +988,6 @@ rmm::device_uvector select_random_vertices( bool sort_vertices, bool do_expensive_check = false); -/** - * @brief renumber sampling output - * - * @deprecated This API will be deprecated and will be replaced by the - * renumber_and_compress_sampled_edgelist and renumber_and_sort_sampled_edgelist functions in - * sampling_functions.hpp. - * - * This function renumbers sampling function (e.g. uniform_neighbor_sample) outputs satisfying the - * following requirements. - * - * 1. If @p edgelist_hops is valid, we can consider (vertex ID, flag=src, hop) triplets for each - * vertex ID in @p edgelist_srcs and (vertex ID, flag=dst, hop) triplets for each vertex ID in @p - * edgelist_dsts. From these triplets, we can find the minimum (hop, flag) pairs for every unique - * vertex ID (hop is the primary key and flag is the secondary key, flag=src is considered smaller - * than flag=dst if hop numbers are same). Vertex IDs with smaller (hop, flag) pairs precede vertex - * IDs with larger (hop, flag) pairs in renumbering. Ordering can be arbitrary among the vertices - * with the same (hop, flag) pairs. - * 2. If @p edgelist_hops is invalid, unique vertex IDs in @p edgelist_srcs precede vertex IDs that - * appear only in @p edgelist_dsts. - * 3. If label_offsets.has_value() is ture, edge lists for different labels will be renumbered - * separately. - * - * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam label_t Type of labels. Needs to be an integral type. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param edgelist_srcs A vector storing original edgelist source vertices. - * @param edgelist_dsts A vector storing original edgelist destination vertices (size = @p - * edgelist_srcs.size()). - * @param edgelist_hops An optional pointer to the array storing hops for each edge list (source, - * destination) pairs (size = @p edgelist_srcs.size() if valid). - * @param label_offsets An optional tuple of unique labels and the input edge list (@p - * edgelist_srcs, @p edgelist_hops, and @p edgelist_dsts) offsets for the labels (siez = # unique - * labels + 1). - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return Tuple of vectors storing renumbered edge sources (size = @p edgelist_srcs.size()) , - * renumbered edge destinations (size = @p edgelist_dsts.size()), renumber_map to query original - * verties (size = # unique vertices or aggregate # unique vertices for every label), and - * renumber_map offsets (size = std::get<0>(*label_offsets).size() + 1, valid only if @p - * label_offsets.has_value() is true). - */ -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - std::optional>> -renumber_sampled_edgelist( - raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional> edgelist_hops, - std::optional, raft::device_span>> - label_offsets, - bool do_expensive_check = false); - /** * @brief Remove self loops from an edge list * @@ -1178,7 +1121,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& edge_srcs, rmm::device_uvector&& edge_dsts, diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index cbb52ef3b1e..a2ff3166fa4 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -636,7 +636,7 @@ class graph_view_t edge_srcs, raft::device_span edge_dsts, - bool do_expensive_check = false); + bool do_expensive_check = false) const; rmm::device_uvector compute_multiplicity( raft::handle_t const& handle, @@ -945,7 +945,7 @@ class graph_view_t has_edge(raft::handle_t const& handle, raft::device_span edge_srcs, raft::device_span edge_dsts, - bool do_expensive_check = false); + bool do_expensive_check = false) const; rmm::device_uvector compute_multiplicity(raft::handle_t const& handle, raft::device_span edge_srcs, diff --git a/cpp/include/cugraph/mtmg/instance_manager.hpp b/cpp/include/cugraph/mtmg/instance_manager.hpp index a2111804997..759635b4a34 100644 --- a/cpp/include/cugraph/mtmg/instance_manager.hpp +++ b/cpp/include/cugraph/mtmg/instance_manager.hpp @@ -20,6 +20,8 @@ #include +#include + #include namespace cugraph { diff --git a/cpp/include/cugraph/mtmg/resource_manager.hpp b/cpp/include/cugraph/mtmg/resource_manager.hpp index a9e4b81f894..e9d25c4576b 100644 --- a/cpp/include/cugraph/mtmg/resource_manager.hpp +++ b/cpp/include/cugraph/mtmg/resource_manager.hpp @@ -27,6 +27,8 @@ #include #include +#include + #include namespace cugraph { diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index fec1a07604e..783cd3a7e2b 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -476,12 +476,12 @@ renumber_and_sort_sampled_edgelist( * 1. If @p edgelist_hops is valid, we can consider (vertex ID, hop, flag=major) triplets for each * vertex ID in edge majors (@p edgelist_srcs if @p src_is_major is true, @p edgelist_dsts if false) * and (vertex ID, hop, flag=minor) triplets for each vertex ID in edge minors. From these triplets, - * we can find the minimum (hop, flag) pairs for every unique vertex ID (hop is the primary key and + * we can find the minimum (hop, flag) pair for every unique vertex ID (hop is the primary key and * flag is the secondary key, flag=major is considered smaller than flag=minor if hop numbers are * same). Vertex IDs with smaller (hop, flag) pairs precede vertex IDs with larger (hop, flag) pairs * in renumbering (if their vertex types are same, vertices with different types are renumbered * separately). Ordering can be arbitrary among the vertices with the same (vertex type, hop, flag) - * triplets. If @p seed_vertices.has-value() is true, we assume (hop=0, flag=major) for every vertex + * triplets. If @p seed_vertices.has_value() is true, we assume (hop=0, flag=major) for every vertex * in @p *seed_vertices in renumbering (this is relevant when there are seed vertices with no * neighbors). * 2. If @p edgelist_hops is invalid, unique vertex IDs in edge majors precede vertex IDs that @@ -495,11 +495,15 @@ renumber_and_sort_sampled_edgelist( * Edge IDs are renumbered fulfilling the following requirements (This is relevant only when @p * edgelist_edge_ids.has_value() is true). * - * 1. If @p edgelist_edge_types.has_value() is true, unique (edge type, edge ID) pairs are - * renumbered to consecutive integers starting from 0 for each edge type. If @p - * edgelist_edge_types.has_value() is true, unique edge IDs are renumbered to consecutive inetgers - * starting from 0. - * 2. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be + * 1. If @p edgelist_hops is valid, we can consider (edge ID, hop) pairs. From these pairs, we can + * find the minimum hop value for every unique edge ID. Edge IDs with smaller hop values precede + * edge IDs with larger hop values in renumbering (if their edge types are same, edges with + * different edge types are renumbered separately). Ordering can be arbitrary among the edge IDs + * with the same (edge type, hop) pairs. + * 2. If @p edgelist_edge_hops.has_value() is false, unique edge IDs (for each edge type is @p + * edgelist_edge_types.has_value() is true) are mapped to consecutive integers starting from 0. The + * ordering can be arbitrary. + * 3. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be * renumbered separately. * * The renumbered edges are sorted based on the following rules. @@ -510,6 +514,11 @@ renumber_and_sort_sampled_edgelist( * true. * 2. Edges in each label are sorted independently if @p edgelist_label_offsets.has_value() is true. * + * This function assumes that there is a single edge source vertex type and a single edge + * destination vertex type for each edge. If @p edgelist_edge_types.has_value() is false (i.e. there + * is only one edge type), there should be only one edge source vertex type and only one edge + * destination vertex type; the source & destination vertex types may or may not coincide. + * * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. @@ -530,19 +539,16 @@ renumber_and_sort_sampled_edgelist( * edgelist_srcs.size() if valid). * @param edgelist_hops An optional vector storing edge list hop numbers (size = @p * edgelist_srcs.size() if valid). @p edgelist_hops should be valid if @p num_hops >= 2. - * @param edgelist_label_offsets An optional pointer to the array storing label offsets to the input - * edges (size = @p num_labels + 1). @p edgelist_label_offsets should be valid if @p num_labels - * >= 2. * @param seed_vertices An optional pointer to the array storing seed vertices in hop 0. * @param seed_vertex_label_offsets An optional pointer to the array storing label offsets to the * seed vertices (size = @p num_labels + 1). @p seed_vertex_label_offsets should be valid if @p * num_labels >= 2 and @p seed_vertices is valid and invalid otherwise. - * ext_vertices A pointer to the array storing external vertex IDs for the local internal vertices. - * The local internal vertex range can be obatined bgy invoking a graph_view_t object's - * local_vertex_partition_range() function. ext_vertex_type offsets A pointer to the array storing - * vertex type offsets for the entire external vertex ID range (array size = @p num_vertex_types + - * 1). For example, if the array stores [0, 100, 200], external vertex IDs [0, 100) has vertex type - * 0 and external vertex IDs [100, 200) has vertex type 1. + * @param edgelist_label_offsets An optional pointer to the array storing label offsets to the input + * edges (size = @p num_labels + 1). @p edgelist_label_offsets should be valid if @p num_labels + * >= 2. + * @param vertex_type offsets A pointer to the array storing vertex type offsets for the entire + * vertex ID range (array size = @p num_vertex_types + 1). For example, if the array stores [0, 100, + * 200], vertex IDs [0, 100) has vertex type 0 and vertex IDs [100, 200) has vertex type 1. * @param num_labels Number of labels. Labels are considered if @p num_labels >=2 and ignored if @p * num_labels = 1. * @param num_hops Number of hops. Hop numbers are considered if @p num_hops >=2 and ignored if @p @@ -552,31 +558,36 @@ renumber_and_sort_sampled_edgelist( * @param src_is_major A flag to determine whether to use the source or destination as the * major key in renumbering and sorting. * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return Tuple of vectors storing edge sources, edge destinations, optional edge weights (valid - * only if @p edgelist_weights.has_value() is true), optional edge IDs (valid only if @p - * edgelist_edge_ids.has_value() is true), optional edge types (valid only if @p - * edgelist_edge_types.has_value() is true), optional (label, hop) offset values to the renumbered - * and sorted edges (size = @p num_labels * @p num_hops + 1, valid only when @p - * edgelist_hops.has_value() or @p edgelist_label_offsetes.has_value() is true), renumber_map to - * query original vertices (size = # unique or aggregate # unique vertices for each label), and - * label offsets to the renumber map (size = @p num_labels + 1, valid only if @p - * edgelist_label_offsets.has_value() is true). + * @return Tuple of vectors storing renumbered edge sources, renumbered edge destinations, optional + * edge weights (valid only if @p edgelist_weights.has_value() is true), optional renumbered edge + * IDs (valid only if @p edgelist_edge_ids.has_value() is true), optional (label, edge type, hop) + * offset values to the renumbered and sorted edges (size = @p num_labels * @p num_edge_types * @p + * num_hops + 1, valid only when @p edgelist_edge_types.has_value(), @p edgelist_hops.has_value(), + * or @p edgelist_label_offsetes.has_value() is true), renumber_map to query original vertices (size + * = # unique or aggregate # unique vertices for each label), (label, vertex type) offsets to the + * vertex renumber map (size = @p num_labels * @p num_vertex_types + 1), optional renumber_map to + * query original edge IDs (size = # unique (edge_type, edge ID) pairs, valid only if @p + * edgelist_edge_ids.has_value() is true), and optional (label, edge type) offsets to the edge ID + * renumber map (size = @p num_labels + @p num_edge_types + 1, valid only if @p + * edgelist_edge_ids.has_value() is true). We do not explicitly return edge source & destination + * vertex types as we assume that source & destination vertex type are implicilty determined for a + * given edge type. */ template std::tuple< - rmm::device_uvector, // srcs - rmm::device_uvector, // dsts - std::optional>, // weights - std::optional>, // edge IDs - std::optional>, // edge types - std::optional>, // (label, edge type, hop) offsets to the edges - rmm::device_uvector, // vertex renumber map - std::optional>, // (label, type) offsets to the vertex renumber map + rmm::device_uvector, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // (label, edge type, hop) offsets to the edges + rmm::device_uvector, // vertex renumber map + rmm::device_uvector, // (label, vertex type) offsets to the vertex renumber map std::optional>, // edge ID renumber map - std::optional>> // (label, type) offsets to the edge ID renumber map + std::optional< + rmm::device_uvector>> // (label, edge type) offsets to the edge ID renumber map heterogeneous_renumber_and_sort_sampled_edgelist( raft::handle_t const& handle, rmm::device_uvector&& edgelist_srcs, @@ -585,11 +596,10 @@ heterogeneous_renumber_and_sort_sampled_edgelist( std::optional>&& edgelist_edge_ids, std::optional>&& edgelist_edge_types, std::optional>&& edgelist_hops, - std::optional> edgelist_label_offsets, std::optional> seed_vertices, std::optional> seed_vertex_label_offsets, - raft::device_span ext_vertices, - raft::device_span ext_vertex_type_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, size_t num_labels, size_t num_hops, size_t num_vertex_types, @@ -743,4 +753,61 @@ lookup_endpoints_from_edge_ids_and_types( raft::device_span edge_ids_to_lookup, raft::device_span edge_types_to_lookup); +/** + * @brief Negative Sampling + * + * This function generates negative samples for graph. + * + * Negative sampling is done by generating a random graph according to the specified + * parameters and optionally removing samples that represent actual edges in the graph + * + * Sampling occurs by creating a list of source vertex ids from biased samping + * of the source vertex space, and destination vertex ids from biased sampling of the + * destination vertex space, and using this as the putative list of edges. We + * then can optionally remove duplicates and remove actual edges in the graph to generate + * the final list. If necessary we will repeat the process to end with a resulting + * edge list of the appropriate size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if + * true) are major indices + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph View object to generate NBR Sampling for + * @param rng_state RNG state + * @param src_biases Optional bias for randomly selecting source vertices. If std::nullopt vertices + * will be selected uniformly. In multi-GPU environment the biases should be partitioned based + * on the vertex partitions. + * @param dst_biases Optional bias for randomly selecting destination vertices. If std::nullopt + * vertices will be selected uniformly. In multi-GPU environment the biases should be partitioned + * based on the vertex partitions. + * @param num_samples Number of negative samples to generate + * @param remove_duplicates If true, remove duplicate samples + * @param remove_existing_edges If true, remove samples that are actually edges in the graph + * @param exact_number_of_samples If true, repeat generation until we get the exact number of + * negative samples + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * + * @return tuple containing source vertex ids and destination vertex ids for the negative samples + */ +template +std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_biases, + std::optional> dst_biases, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/include/cugraph_c/coo.h b/cpp/include/cugraph_c/coo.h new file mode 100644 index 00000000000..ef746c6ed6a --- /dev/null +++ b/cpp/include/cugraph_c/coo.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Opaque COO definition + */ +typedef struct { + int32_t align_; +} cugraph_coo_t; + +/** + * @brief Opaque COO list definition + */ +typedef struct { + int32_t align_; +} cugraph_coo_list_t; + +/** + * @brief Get the source vertex ids + * + * @param [in] coo Opaque pointer to COO + * @return type erased array view of source vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_coo_get_sources(cugraph_coo_t* coo); + +/** + * @brief Get the destination vertex ids + * + * @param [in] coo Opaque pointer to COO + * @return type erased array view of destination vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_coo_get_destinations(cugraph_coo_t* coo); + +/** + * @brief Get the edge weights + * + * @param [in] coo Opaque pointer to COO + * @return type erased array view of edge weights, NULL if no edge weights in COO + */ +cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_weights(cugraph_coo_t* coo); + +/** + * @brief Get the edge id + * + * @param [in] coo Opaque pointer to COO + * @return type erased array view of edge id, NULL if no edge ids in COO + */ +cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_id(cugraph_coo_t* coo); + +/** + * @brief Get the edge type + * + * @param [in] coo Opaque pointer to COO + * @return type erased array view of edge type, NULL if no edge types in COO + */ +cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_type(cugraph_coo_t* coo); + +/** + * @brief Get the number of coo object in the list + * + * @param [in] coo_list Opaque pointer to COO list + * @return number of elements + */ +size_t cugraph_coo_list_size(const cugraph_coo_list_t* coo_list); + +/** + * @brief Get a COO from the list + * + * @param [in] coo_list Opaque pointer to COO list + * @param [in] index Index of desired COO from list + * @return a cugraph_coo_t* object from the list + */ +cugraph_coo_t* cugraph_coo_list_element(cugraph_coo_list_t* coo_list, size_t index); + +/** + * @brief Free coo object + * + * @param [in] coo Opaque pointer to COO + */ +void cugraph_coo_free(cugraph_coo_t* coo); + +/** + * @brief Free coo list + * + * @param [in] coo_list Opaque pointer to list of COO objects + */ +void cugraph_coo_list_free(cugraph_coo_list_t* coo_list); + +#ifdef __cplusplus +} +#endif diff --git a/cpp/include/cugraph_c/graph_generators.h b/cpp/include/cugraph_c/graph_generators.h index 272131d2aab..553be530e95 100644 --- a/cpp/include/cugraph_c/graph_generators.h +++ b/cpp/include/cugraph_c/graph_generators.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -27,91 +28,6 @@ extern "C" { typedef enum { POWER_LAW = 0, UNIFORM } cugraph_generator_distribution_t; -/** - * @brief Opaque COO definition - */ -typedef struct { - int32_t align_; -} cugraph_coo_t; - -/** - * @brief Opaque COO list definition - */ -typedef struct { - int32_t align_; -} cugraph_coo_list_t; - -/** - * @brief Get the source vertex ids - * - * @param [in] coo Opaque pointer to COO - * @return type erased array view of source vertex ids - */ -cugraph_type_erased_device_array_view_t* cugraph_coo_get_sources(cugraph_coo_t* coo); - -/** - * @brief Get the destination vertex ids - * - * @param [in] coo Opaque pointer to COO - * @return type erased array view of destination vertex ids - */ -cugraph_type_erased_device_array_view_t* cugraph_coo_get_destinations(cugraph_coo_t* coo); - -/** - * @brief Get the edge weights - * - * @param [in] coo Opaque pointer to COO - * @return type erased array view of edge weights, NULL if no edge weights in COO - */ -cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_weights(cugraph_coo_t* coo); - -/** - * @brief Get the edge id - * - * @param [in] coo Opaque pointer to COO - * @return type erased array view of edge id, NULL if no edge ids in COO - */ -cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_id(cugraph_coo_t* coo); - -/** - * @brief Get the edge type - * - * @param [in] coo Opaque pointer to COO - * @return type erased array view of edge type, NULL if no edge types in COO - */ -cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_type(cugraph_coo_t* coo); - -/** - * @brief Get the number of coo object in the list - * - * @param [in] coo_list Opaque pointer to COO list - * @return number of elements - */ -size_t cugraph_coo_list_size(const cugraph_coo_list_t* coo_list); - -/** - * @brief Get a COO from the list - * - * @param [in] coo_list Opaque pointer to COO list - * @param [in] index Index of desired COO from list - * @return a cugraph_coo_t* object from the list - */ -cugraph_coo_t* cugraph_coo_list_element(cugraph_coo_list_t* coo_list, size_t index); - -/** - * @brief Free coo object - * - * @param [in] coo Opaque pointer to COO - */ -void cugraph_coo_free(cugraph_coo_t* coo); - -/** - * @brief Free coo list - * - * @param [in] coo_list Opaque pointer to list of COO objects - */ -void cugraph_coo_list_free(cugraph_coo_list_t* coo_list); - /** * @brief Generate RMAT edge list * diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 1a3d20b9339..bb26e577915 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -674,6 +675,57 @@ cugraph_error_code_t cugraph_select_random_vertices(const cugraph_resource_handl cugraph_type_erased_device_array_t** vertices, cugraph_error_t** error); +/** + * @ingroup samplingC + * @brief Perform negative sampling + * + * Negative sampling generates a COO structure defining edges according to the specified parameters + * + * @param [in] handle Handle for accessing resources + * @param [in,out] rng_state State of the random number generator, updated with each + * call + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex ids for the source biases. If @p src_bias and + * @p dst_bias are not specified this is ignored. If + * @p vertices is specified then vertices[i] is the vertex + * id of src_biases[i] and dst_biases[i]. If @p vertices + * is not specified then i is the vertex id if src_biases[i] + * and dst_biases[i] + * @param [in] src_biases Bias for selecting source vertices. If NULL, do uniform + * sampling, if provided probability of vertex i will be + * src_bias[i] / (sum of all source biases) + * @param [in] dst_biases Bias for selecting destination vertices. If NULL, do + * uniform sampling, if provided probability of vertex i + * will be dst_bias[i] / (sum of all destination biases) + * @param [in] num_samples Number of negative samples to generate + * @param [in] remove_duplicates If true, remove duplicates from sampled edges + * @param [in] remove_existing_edges If true, remove sampled edges that actually exist in + * the graph + * @param [in] exact_number_of_samples If true, result should contain exactly @p num_samples. If + * false the code will generate @p num_samples and then do + * any filtering as specified + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if + * set to true) + * @param [out] result Opaque pointer to generated coo list + * @param [out] error Pointer to an error object storing details of any error. + * Will be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_negative_sampling( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src_biases, + const cugraph_type_erased_device_array_view_t* dst_biases, + size_t num_samples, + bool_t remove_duplicates, + bool_t remove_existing_edges, + bool_t exact_number_of_samples, + bool_t do_expensive_check, + cugraph_coo_t** result, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/coo.hpp b/cpp/src/c_api/coo.hpp new file mode 100644 index 00000000000..a83a3af375a --- /dev/null +++ b/cpp/src/c_api/coo.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/array.hpp" + +#include + +namespace cugraph { +namespace c_api { + +struct cugraph_coo_t { + std::unique_ptr src_{}; + std::unique_ptr dst_{}; + std::unique_ptr wgt_{}; + std::unique_ptr id_{}; + std::unique_ptr type_{}; +}; + +struct cugraph_coo_list_t { + std::vector> list_; +}; + +} // namespace c_api +} // namespace cugraph diff --git a/cpp/src/c_api/graph_generators.cpp b/cpp/src/c_api/graph_generators.cpp index ef478e57098..a58a4d5db35 100644 --- a/cpp/src/c_api/graph_generators.cpp +++ b/cpp/src/c_api/graph_generators.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ #include "c_api/array.hpp" +#include "c_api/coo.hpp" #include "c_api/error.hpp" #include "c_api/random.hpp" #include "c_api/resource_handle.hpp" @@ -26,24 +27,6 @@ #include -namespace cugraph { -namespace c_api { - -struct cugraph_coo_t { - std::unique_ptr src_{}; - std::unique_ptr dst_{}; - std::unique_ptr wgt_{}; - std::unique_ptr id_{}; - std::unique_ptr type_{}; -}; - -struct cugraph_coo_list_t { - std::vector> list_; -}; - -} // namespace c_api -} // namespace cugraph - namespace { template @@ -141,32 +124,41 @@ cugraph_error_code_t cugraph_generate_rmat_edgelists( extern "C" cugraph_type_erased_device_array_view_t* cugraph_coo_get_sources(cugraph_coo_t* coo) { auto internal_pointer = reinterpret_cast(coo); - return reinterpret_cast(internal_pointer->src_->view()); + return (internal_pointer->src_) ? reinterpret_cast( + internal_pointer->src_->view()) + : nullptr; } extern "C" cugraph_type_erased_device_array_view_t* cugraph_coo_get_destinations(cugraph_coo_t* coo) { auto internal_pointer = reinterpret_cast(coo); - return reinterpret_cast(internal_pointer->dst_->view()); + return (internal_pointer->dst_) ? reinterpret_cast( + internal_pointer->dst_->view()) + : nullptr; } extern "C" cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_weights(cugraph_coo_t* coo) { auto internal_pointer = reinterpret_cast(coo); - return reinterpret_cast(internal_pointer->wgt_->view()); + return (internal_pointer->wgt_) ? reinterpret_cast( + internal_pointer->wgt_->view()) + : nullptr; } extern "C" cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_id(cugraph_coo_t* coo) { auto internal_pointer = reinterpret_cast(coo); - return reinterpret_cast(internal_pointer->id_->view()); + return (internal_pointer->id_) ? reinterpret_cast( + internal_pointer->id_->view()) + : nullptr; } extern "C" cugraph_type_erased_device_array_view_t* cugraph_coo_get_edge_type(cugraph_coo_t* coo) { auto internal_pointer = reinterpret_cast(coo); - return reinterpret_cast( - internal_pointer->type_->view()); + return (internal_pointer->type_) ? reinterpret_cast( + internal_pointer->type_->view()) + : nullptr; } extern "C" size_t cugraph_coo_list_size(const cugraph_coo_list_t* coo_list) diff --git a/cpp/src/c_api/negative_sampling.cpp b/cpp/src/c_api/negative_sampling.cpp new file mode 100644 index 00000000000..54f465d67b4 --- /dev/null +++ b/cpp/src/c_api/negative_sampling.cpp @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/abstract_functor.hpp" +#include "c_api/coo.hpp" +#include "c_api/graph.hpp" +#include "c_api/random.hpp" +#include "c_api/resource_handle.hpp" +#include "c_api/utils.hpp" + +#include + +#include +#include +#include +#include + +#include + +namespace { + +struct negative_sampling_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; + cugraph::c_api::cugraph_graph_t* graph_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* src_biases_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* dst_biases_{nullptr}; + size_t num_samples_; + bool remove_duplicates_{false}; + bool remove_existing_edges_{false}; + bool exact_number_of_samples_{false}; + bool do_expensive_check_{false}; + cugraph::c_api::cugraph_coo_t* result_{nullptr}; + + negative_sampling_functor(const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src_biases, + const cugraph_type_erased_device_array_view_t* dst_biases, + size_t num_samples, + bool_t remove_duplicates, + bool_t remove_existing_edges, + bool_t exact_number_of_samples, + bool_t do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + rng_state_(reinterpret_cast(rng_state)), + graph_(reinterpret_cast(graph)), + vertices_( + reinterpret_cast(vertices)), + src_biases_(reinterpret_cast( + src_biases)), + dst_biases_(reinterpret_cast( + dst_biases)), + num_samples_(num_samples), + remove_duplicates_(remove_duplicates), + remove_existing_edges_(remove_existing_edges), + exact_number_of_samples_(exact_number_of_samples), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + // FIXME: Think about how to handle SG vice MG + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // negative_sampling expects store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + rmm::device_uvector vertices(0, handle_.get_stream()); + rmm::device_uvector src_biases(0, handle_.get_stream()); + rmm::device_uvector dst_biases(0, handle_.get_stream()); + + if (src_biases_ != nullptr) { + vertices.resize(vertices_->size_, handle_.get_stream()); + src_biases.resize(src_biases_->size_, handle_.get_stream()); + + raft::copy( + vertices.data(), vertices_->as_type(), vertices.size(), handle_.get_stream()); + raft::copy(src_biases.data(), + src_biases_->as_type(), + src_biases.size(), + handle_.get_stream()); + + src_biases = cugraph::detail:: + collect_local_vertex_values_from_ext_vertex_value_pairs( + handle_, + std::move(vertices), + std::move(src_biases), + *number_map, + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + weight_t{0}, + do_expensive_check_); + } + + if (dst_biases_ != nullptr) { + vertices.resize(vertices_->size_, handle_.get_stream()); + dst_biases.resize(dst_biases_->size_, handle_.get_stream()); + + raft::copy( + vertices.data(), vertices_->as_type(), vertices.size(), handle_.get_stream()); + raft::copy(dst_biases.data(), + dst_biases_->as_type(), + dst_biases.size(), + handle_.get_stream()); + + dst_biases = cugraph::detail:: + collect_local_vertex_values_from_ext_vertex_value_pairs( + handle_, + std::move(vertices), + std::move(dst_biases), + *number_map, + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + weight_t{0}, + do_expensive_check_); + } + + auto&& [src, dst] = cugraph::negative_sampling( + handle_, + rng_state_->rng_state_, + graph_view, + (src_biases_ != nullptr) ? std::make_optional(raft::device_span{ + src_biases.data(), src_biases.size()}) + : std::nullopt, + (dst_biases_ != nullptr) ? std::make_optional(raft::device_span{ + dst_biases.data(), dst_biases.size()}) + : std::nullopt, + num_samples_, + remove_duplicates_, + remove_existing_edges_, + exact_number_of_samples_, + do_expensive_check_); + + std::vector vertex_partition_lasts = graph_view.vertex_partition_range_lasts(); + + cugraph::unrenumber_int_vertices(handle_, + src.data(), + src.size(), + number_map->data(), + vertex_partition_lasts, + do_expensive_check_); + + cugraph::unrenumber_int_vertices(handle_, + dst.data(), + dst.size(), + number_map->data(), + vertex_partition_lasts, + do_expensive_check_); + + result_ = new cugraph::c_api::cugraph_coo_t{ + std::make_unique(src, + graph_->vertex_type_), + std::make_unique(dst, + graph_->vertex_type_), + nullptr, + nullptr, + nullptr}; + } + } +}; + +} // namespace + +cugraph_error_code_t cugraph_negative_sampling( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src_biases, + const cugraph_type_erased_device_array_view_t* dst_biases, + size_t num_samples, + bool_t remove_duplicates, + bool_t remove_existing_edges, + bool_t exact_number_of_samples, + bool_t do_expensive_check, + cugraph_coo_t** result, + cugraph_error_t** error) +{ + negative_sampling_functor functor{handle, + rng_state, + graph, + vertices, + src_biases, + dst_biases, + num_samples, + remove_duplicates, + remove_existing_edges, + exact_number_of_samples, + do_expensive_check}; + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index b9a2c8e4f60..705d2108437 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -16,6 +16,7 @@ #include "c_api/abstract_functor.hpp" #include "c_api/graph.hpp" +#include "c_api/random.hpp" #include "c_api/resource_handle.hpp" #include "c_api/utils.hpp" @@ -153,10 +154,11 @@ namespace { struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; + // FIXME: rng_state_ should be passed as a parameter + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; size_t max_length_{0}; - size_t seed_{0}; cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr}; uniform_random_walks_functor(cugraph_resource_handle_t const* handle, @@ -222,13 +224,17 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_last(), false); + // FIXME: remove once rng_state passed as parameter + rng_state_ = reinterpret_cast( + new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}}); + auto [paths, weights] = cugraph::uniform_random_walks( handle_, + rng_state_->rng_state_, graph_view, (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - max_length_, - seed_); + max_length_); // // Need to unrenumber the vertices in the resulting paths @@ -255,11 +261,12 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor { struct biased_random_walks_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; + // FIXME: rng_state_ should be passed as a parameter + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; size_t max_length_{0}; cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr}; - uint64_t seed_{0}; biased_random_walks_functor(cugraph_resource_handle_t const* handle, cugraph_graph_t* graph, @@ -326,13 +333,17 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_last(), false); + // FIXME: remove once rng_state passed as parameter + rng_state_ = reinterpret_cast( + new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}}); + auto [paths, weights] = cugraph::biased_random_walks( handle_, + rng_state_->rng_state_, graph_view, edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - max_length_, - seed_); + max_length_); // // Need to unrenumber the vertices in the resulting paths @@ -354,12 +365,13 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor { struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; + // FIXME: rng_state_ should be passed as a parameter + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; size_t max_length_{0}; double p_{0}; double q_{0}; - uint64_t seed_{0}; cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr}; node2vec_random_walks_functor(cugraph_resource_handle_t const* handle, @@ -431,15 +443,19 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_last(), false); + // FIXME: remove once rng_state passed as parameter + rng_state_ = reinterpret_cast( + new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}}); + auto [paths, weights] = cugraph::node2vec_random_walks( handle_, + rng_state_->rng_state_, graph_view, (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, max_length_, static_cast(p_), - static_cast(q_), - seed_); + static_cast(q_)); // FIXME: Need to fix invalid_vtx issue here. We can't unrenumber max_vertex_id+1 // properly... diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 225687c4cf0..e3501065008 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -18,8 +18,8 @@ #include "detail/graph_partition_utils.cuh" #include "prims/edge_bucket.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" -#include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include #include @@ -124,7 +124,8 @@ struct extract_q_r { template edge_property_t, edge_t> edge_triangle_count_impl( raft::handle_t const& handle, - graph_view_t const& graph_view) + graph_view_t const& graph_view, + bool do_expensive_check) { using weight_t = float; rmm::device_uvector edgelist_srcs(0, handle.get_stream()); @@ -158,14 +159,11 @@ edge_property_t, edge_t> edge_t num_remaining_edges -= chunk_size; // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + prev_chunk_size, - edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - false /*FIXME: pass 'do_expensive_check' as argument*/); - + per_v_pair_dst_nbr_intersection(handle, + graph_view, + edge_first + prev_chunk_size, + edge_first + prev_chunk_size + chunk_size, + do_expensive_check); // Update the number of triangles of each (p, q) edges by looking at their intersection // size thrust::for_each( @@ -365,9 +363,11 @@ edge_property_t, edge_t> edge_t template edge_property_t, edge_t> edge_triangle_count( - raft::handle_t const& handle, graph_view_t const& graph_view) + raft::handle_t const& handle, + graph_view_t const& graph_view, + bool do_expensive_check) { - return detail::edge_triangle_count_impl(handle, graph_view); + return detail::edge_triangle_count_impl(handle, graph_view, do_expensive_check); } } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_mg_v32_e32.cu b/cpp/src/community/edge_triangle_count_mg_v32_e32.cu index 1212a13323b..5e333139ddf 100644 --- a/cpp/src/community/edge_triangle_count_mg_v32_e32.cu +++ b/cpp/src/community/edge_triangle_count_mg_v32_e32.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int32_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_mg_v32_e64.cu b/cpp/src/community/edge_triangle_count_mg_v32_e64.cu index 64ee195c7ee..adab2d1fede 100644 --- a/cpp/src/community/edge_triangle_count_mg_v32_e64.cu +++ b/cpp/src/community/edge_triangle_count_mg_v32_e64.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_mg_v64_e64.cu b/cpp/src/community/edge_triangle_count_mg_v64_e64.cu index 67c19e5ac52..1f321b2149f 100644 --- a/cpp/src/community/edge_triangle_count_mg_v64_e64.cu +++ b/cpp/src/community/edge_triangle_count_mg_v64_e64.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_sg_v32_e32.cu b/cpp/src/community/edge_triangle_count_sg_v32_e32.cu index d6a215aa456..3e16a2cf7ef 100644 --- a/cpp/src/community/edge_triangle_count_sg_v32_e32.cu +++ b/cpp/src/community/edge_triangle_count_sg_v32_e32.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int32_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_sg_v32_e64.cu b/cpp/src/community/edge_triangle_count_sg_v32_e64.cu index e70fa45c257..24a8de868e0 100644 --- a/cpp/src/community/edge_triangle_count_sg_v32_e64.cu +++ b/cpp/src/community/edge_triangle_count_sg_v32_e64.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_sg_v64_e64.cu b/cpp/src/community/edge_triangle_count_sg_v64_e64.cu index 849603f781b..81f814df713 100644 --- a/cpp/src/community/edge_triangle_count_sg_v64_e64.cu +++ b/cpp/src/community/edge_triangle_count_sg_v64_e64.cu @@ -20,6 +20,7 @@ namespace cugraph { // SG instantiation template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view); + cugraph::graph_view_t const& graph_view, + bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/community/egonet_impl.cuh b/cpp/src/community/egonet_impl.cuh index 8b942be5b6a..c7945831ba8 100644 --- a/cpp/src/community/egonet_impl.cuh +++ b/cpp/src/community/egonet_impl.cuh @@ -17,8 +17,6 @@ // #define TIMING -#include "utilities/graph_utils.cuh" - #include #include #include diff --git a/cpp/src/components/legacy/connectivity.cu b/cpp/src/components/legacy/connectivity.cu index ecaaab173db..4d0198fdff6 100644 --- a/cpp/src/components/legacy/connectivity.cu +++ b/cpp/src/components/legacy/connectivity.cu @@ -15,7 +15,6 @@ */ #include "scc_matrix.cuh" -#include "utilities/graph_utils.cuh" #include "weak_cc.cuh" #include diff --git a/cpp/src/detail/utility_wrappers_32.cu b/cpp/src/detail/utility_wrappers_32.cu index 6ab5ae375ca..de407f12493 100644 --- a/cpp/src/detail/utility_wrappers_32.cu +++ b/cpp/src/detail/utility_wrappers_32.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "detail/utility_wrappers.cuh" +#include "detail/utility_wrappers_impl.cuh" #include #include @@ -68,6 +68,23 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view, size_t size, int32_t start_value); +template void sequence_fill(rmm::cuda_stream_view const& stream_view, + uint32_t* d_value, + size_t size, + uint32_t start_value); + +template void stride_fill(rmm::cuda_stream_view const& stream_view, + int32_t* d_value, + size_t size, + int32_t start_value, + int32_t stride); + +template void stride_fill(rmm::cuda_stream_view const& stream_view, + uint32_t* d_value, + size_t size, + uint32_t start_value, + uint32_t stride); + template int32_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, int32_t const* d_edgelist_srcs, int32_t const* d_edgelist_dsts, diff --git a/cpp/src/detail/utility_wrappers_64.cu b/cpp/src/detail/utility_wrappers_64.cu index a12bc3e952d..2c136d5902b 100644 --- a/cpp/src/detail/utility_wrappers_64.cu +++ b/cpp/src/detail/utility_wrappers_64.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "detail/utility_wrappers.cuh" +#include "detail/utility_wrappers_impl.cuh" #include #include @@ -71,6 +71,18 @@ template void sequence_fill(rmm::cuda_stream_view const& stream_view, size_t size, uint64_t start_value); +template void stride_fill(rmm::cuda_stream_view const& stream_view, + int64_t* d_value, + size_t size, + int64_t start_value, + int64_t stride); + +template void stride_fill(rmm::cuda_stream_view const& stream_view, + uint64_t* d_value, + size_t size, + uint64_t start_value, + uint64_t stride); + template int64_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, int64_t const* d_edgelist_srcs, int64_t const* d_edgelist_dsts, diff --git a/cpp/src/detail/utility_wrappers.cuh b/cpp/src/detail/utility_wrappers_impl.cuh similarity index 88% rename from cpp/src/detail/utility_wrappers.cuh rename to cpp/src/detail/utility_wrappers_impl.cuh index ce8549db9f8..074d7044261 100644 --- a/cpp/src/detail/utility_wrappers.cuh +++ b/cpp/src/detail/utility_wrappers_impl.cuh @@ -72,6 +72,22 @@ void sequence_fill(rmm::cuda_stream_view const& stream_view, thrust::sequence(rmm::exec_policy(stream_view), d_value, d_value + size, start_value); } +template +void stride_fill(rmm::cuda_stream_view const& stream_view, + value_t* d_value, + size_t size, + value_t start_value, + value_t stride) +{ + thrust::transform(rmm::exec_policy(stream_view), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(size), + d_value, + cuda::proclaim_return_type([start_value, stride] __device__(size_t i) { + return static_cast(start_value + stride * i); + })); +} + template vertex_t compute_maximum_vertex_id(rmm::cuda_stream_view const& stream_view, vertex_t const* d_edgelist_srcs, diff --git a/cpp/src/from_cugraph_ops/algo_R.cuh b/cpp/src/from_cugraph_ops/algo_R.cuh new file mode 100644 index 00000000000..031a7d2ceb9 --- /dev/null +++ b/cpp/src/from_cugraph_ops/algo_R.cuh @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "device.cuh" + +#include + +#include +#include +#include + +#include + +namespace cugraph::ops::graph { + +// single warp-separated field of type IdxT +template +using smem_algo_r_t = utils::smem_unit_simple_t<1, IdxT>; + +template +__device__ __forceinline__ void warp_algo_r_index(IdxT* smem, + IdxT pop_size, + IdxT idx_offset, + int sample_size, + raft::random::DeviceState& rng_state) +{ + auto lane = utils::lane_id(); + // first 'sample_size' are just copied + CUGRAPH_OPS_UNROLL + for (int i = lane; i < sample_size; i += utils::WARP_SIZE) { + smem[i] = idx_offset + i; + } + auto sample_size_idxt = IdxT{sample_size}; + if (sample_size_idxt >= pop_size) return; + + // we must synchronize here since we have just written to smem + utils::warp_sync(); + // TODO(mjoux): when we support more warps per node enable this + //__syncthreads(); + + auto idx_end = idx_offset + pop_size; + auto n = idx_offset + sample_size_idxt; + auto flat_id = uint64_t{threadIdx.x + blockIdx.x * blockDim.x}; + GenT gen(rng_state, flat_id); + CUGRAPH_OPS_UNROLL + for (auto nidx = n + IdxT{lane}; nidx < idx_end; nidx += IdxT{utils::WARP_SIZE}) { + // nidx - idx_offset inclusive (necessary for correctness of algo R) + auto end = nidx - idx_offset + 1; + raft::random::UniformIntDistParams int_params{}; + int_params.start = IdxT{0}; + int_params.end = IdxT{end}; + int_params.diff = static_cast(end); + IdxT idx; + raft::random::custom_next(gen, &idx, int_params, 0, 0 /* idx / stride unused */); + if (idx < sample_size_idxt) { + // using atomic max instead of exch here because it leads to the same + // output as the sequential algorithm (DGL does this, too) + // Additionally, we use the index instead of the neighbor ID here + // since this allows copying over other node/edge-related data + // (useful for heterogeneous graphs for example) + utils::atomic_max(smem + idx, nidx); + } + } + // must synchronize to make smem valid + utils::warp_sync(); + // TODO(mjoux): when we support more warps per node enable this + //__syncthreads(); +} + +template +__device__ __forceinline__ void warp_algo_r(IdxT* smem, + IdxT row_id, + const IdxT* nodes, + const IdxT* fg_offsets, + int sample_size, + IdxT& node_id, + IdxT& node_start, + IdxT& node_end, + raft::random::DeviceState& rng_state) +{ + auto lane = utils::lane_id(); + if (nodes == nullptr) { + node_id = row_id; + if (lane == 0) + node_start = fg_offsets[node_id]; + else if (lane == 1) + node_end = fg_offsets[node_id + 1]; + node_start = utils::shfl(node_start, 0); + node_end = utils::shfl(node_end, 1); + } else { + if (lane == 0) { + node_id = nodes[row_id]; + node_start = fg_offsets[node_id]; + node_end = fg_offsets[node_id + 1]; + } + node_id = utils::shfl(node_id, 0); + node_start = utils::shfl(node_start, 0); + node_end = utils::shfl(node_end, 0); + } + auto pop_size = node_end - node_start; + warp_algo_r_index(smem, pop_size, node_start, sample_size, rng_state); +} + +// TODO(mjoux): support configuring n_warps_per_node in template +template +CUGRAPH_OPS_KERNEL void algo_r_kernel(raft::random::DeviceState rng_state, + IdxT* neighbors, + IdxT* counts, + // edge_types / node_types should be non-const + // probably detected if `!IS_HG` + // NOLINTNEXTLINE(readability-non-const-parameter) + int32_t* edge_types, + // NOLINTNEXTLINE(readability-non-const-parameter) + int32_t* node_types, + const IdxT* offsets, + const IdxT* indices, + const int32_t* g_edge_types, + const int32_t* g_node_types, + const IdxT* nodes, + IdxT n_dst_nodes, + int sample_size) +{ + auto lane = utils::lane_id(); + auto warp = utils::warp_id(); // 1D block with X dim + auto row_id = warp + static_cast(blockIdx.x) * IdxT{N_WARPS}; + if (row_id >= n_dst_nodes) { return; } + IdxT* s_idx; + smem_algo_r_t smem{}; + int32_t smem_sizes[] = {sample_size}; + smem.set_ptrs(warp, N_WARPS, smem_sizes, s_idx); + IdxT node_id, node_start, node_end; + warp_algo_r( + s_idx, row_id, nodes, offsets, sample_size, node_id, node_start, node_end, rng_state); + + IdxT count = 0; + for (int i = lane; i < sample_size; i += utils::WARP_SIZE) { + auto nidx = s_idx[i]; + // checking for node_end here because sample_size may be larger than + // the total number of neighbors of the node + auto val = nidx < node_end ? indices[nidx] : cugraph::invalid_idx::value; + // TODO(mjoux) it's possible that we break the ELLPACK format here since + // if we set val to invalid, we should add it to end of list, rather + // than simply at index "i". This is ignored for now since the case + // where SAMPLE_SELF := false is rare and unconventional + if (!SAMPLE_SELF && val == node_id) val = cugraph::invalid_idx::value; + auto local_id = row_id * IdxT{sample_size} + i; + neighbors[local_id] = val; + if (val != cugraph::invalid_idx::value) { + ++count; + if (IS_HG) edge_types[local_id] = g_edge_types[nidx]; + } + } + if (IS_HG && lane == 0) node_types[row_id] = g_node_types[node_id]; + if (counts != nullptr) { + count = utils::warp_reduce(count); + if (lane == 0) { counts[row_id] = count; } + } +} + +template +void algo_r_impl(IdxT* neighbors, + IdxT* counts, + int32_t* edge_types, + int32_t* node_types, + raft::random::RngState& rng, + const IdxT* offsets, + const IdxT* indices, + const int32_t* g_edge_types, + const int32_t* g_node_types, + const IdxT* nodes, + IdxT n_dst_nodes, + IdxT g_n_dst_nodes, + IdxT sample_size, + IdxT max_val, + cudaStream_t stream) +{ + if (nodes == nullptr) { n_dst_nodes = g_n_dst_nodes; } + ASSERT(n_dst_nodes <= g_n_dst_nodes, + "Algo R: expected n_dst_nodes <= graph.n_dst_nodes (%ld > %ld)", + long(n_dst_nodes), + long(g_n_dst_nodes)); + ASSERT( + static_cast(sample_size) + 2 < static_cast(std::numeric_limits::max()), + "Expected sample size [+2] to be lower than INT_MAX"); + static constexpr int TPB = 512; + static constexpr int N_WARPS = TPB / utils::WARP_SIZE; + auto n_blks = utils::ceil_div(n_dst_nodes, N_WARPS); + int sample_size_i = static_cast(sample_size); + int32_t smem_sizes[] = {sample_size_i}; + size_t smem_size = smem_algo_r_t::get_size(N_WARPS, smem_sizes); + if (static_cast(max_val) < std::numeric_limits::max()) { + // we'll use the 32-bit based method for generating random integers + // as we most likely do not need less bias + RAFT_CALL_RNG_FUNC( + rng, + (algo_r_kernel<<>>), + neighbors, + counts, + edge_types, + node_types, + offsets, + indices, + g_edge_types, + g_node_types, + nodes, + n_dst_nodes, + sample_size_i); + } else { + RAFT_CALL_RNG_FUNC( + rng, + (algo_r_kernel<<>>), + neighbors, + counts, + edge_types, + node_types, + offsets, + indices, + g_edge_types, + g_node_types, + nodes, + n_dst_nodes, + sample_size_i); + } + // update the rng state (this is a pessimistic update as it is difficult to + // compute the number of RNG calls done per thread!) + auto thread_rs = utils::ceil_div( + std::max(IdxT{0}, std::min(max_val, g_n_dst_nodes) - sample_size), utils::WARP_SIZE); + rng.advance(static_cast(n_blks * TPB), thread_rs); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +} // namespace cugraph::ops::graph diff --git a/cpp/src/from_cugraph_ops/device.cuh b/cpp/src/from_cugraph_ops/device.cuh new file mode 100644 index 00000000000..f7d37c62f35 --- /dev/null +++ b/cpp/src/from_cugraph_ops/device.cuh @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "device_atomics.cuh" +#include "device_core.hpp" +#include "device_dim.cuh" +#include "device_smem_helper.cuh" +#include "device_warp_collectives.cuh" +#include "macros.hpp" diff --git a/cpp/src/from_cugraph_ops/device_atomics.cuh b/cpp/src/from_cugraph_ops/device_atomics.cuh new file mode 100644 index 00000000000..b8be7614284 --- /dev/null +++ b/cpp/src/from_cugraph_ops/device_atomics.cuh @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include +#include + +#include + +namespace cugraph::ops::utils { + +/** + * @defgroup AtomicMax Device atomic max operation + * + * @{ + */ +template +__device__ inline DataT atomic_max(DataT* address, DataT val) +{ + return atomicMax(address, val); +} +template <> +__device__ inline float atomic_max(float* address, float val) +{ + using u32_t = unsigned int; + auto* address_as_u32 = reinterpret_cast(address); + u32_t old = *address_as_u32, assumed; + do { + assumed = old; + old = atomicCAS(address_as_u32, assumed, __float_as_uint(max(val, __uint_as_float(assumed)))); + } while (assumed != old); + return __uint_as_float(old); +} +template <> +__device__ inline double atomic_max(double* address, double val) +{ + using u64_t = unsigned long long; // NOLINT(google-runtime-int) + auto* address_as_ull = reinterpret_cast(address); + u64_t old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS( + address_as_ull, assumed, __double_as_longlong(max(val, __longlong_as_double(assumed)))); + } while (assumed != old); + return __longlong_as_double(old); +} +template <> +__device__ inline int64_t atomic_max(int64_t* address, int64_t val) +{ + using u64_t = unsigned long long; // NOLINT(google-runtime-int) + auto* val_as_u64 = reinterpret_cast(&val); + auto* address_as_u64 = reinterpret_cast(address); + auto ret = atomicMax(address_as_u64, *val_as_u64); + return *reinterpret_cast(&ret); +} +template <> +__device__ inline uint64_t atomic_max(uint64_t* address, uint64_t val) +{ + using u64_t = unsigned long long; // NOLINT(google-runtime-int) + auto* val_as_u64 = reinterpret_cast(&val); + auto* address_as_u64 = reinterpret_cast(address); + auto ret = atomicMax(address_as_u64, *val_as_u64); + return *reinterpret_cast(&ret); +} +/** @} */ + +} // namespace cugraph::ops::utils diff --git a/cpp/src/from_cugraph_ops/device_core.hpp b/cpp/src/from_cugraph_ops/device_core.hpp new file mode 100644 index 00000000000..b548d2d4d1f --- /dev/null +++ b/cpp/src/from_cugraph_ops/device_core.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "macros.hpp" + +namespace cugraph::ops::utils { + +/** number of threads per warp */ +static constexpr int WARP_SIZE = 32; + +/** minimum CUDA version required for warp shfl sync functions */ +static constexpr int CUDA_VER_WARP_SHFL = 9000; + +/** + * @brief Provide a ceiling division operation ie. ceil(a / b) + * + * @tparam IntT supposed to be only integers for now! + * + * @param[in] a dividend + * @param[in] b divisor + */ +template +constexpr CUGRAPH_OPS_HD IntT ceil_div(IntT a, IntT b) +{ + return (a + b - 1) / b; +} + +/** + * @brief Provide an alignment function ie. ceil(a / b) * b + * + * @tparam IntT supposed to be only integers for now! + * + * @param[in] a dividend + * @param[in] b divisor + */ +template +constexpr CUGRAPH_OPS_HD IntT align_to(IntT a, IntT b) +{ + return ceil_div(a, b) * b; +} + +} // namespace cugraph::ops::utils diff --git a/cpp/src/from_cugraph_ops/device_dim.cuh b/cpp/src/from_cugraph_ops/device_dim.cuh new file mode 100644 index 00000000000..275d0edd485 --- /dev/null +++ b/cpp/src/from_cugraph_ops/device_dim.cuh @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "device_core.hpp" + +namespace cugraph::ops::utils { + +/** get the lane id of the current thread */ +__device__ __forceinline__ int lane_id() +{ + int id; + asm("mov.s32 %0, %%laneid;" : "=r"(id)); + return id; +} + +/** + * get the flat id of the current thread (within block) + * template parameters allow to control which CTA dimensions are used + */ +template +__device__ __forceinline__ int flat_id() +{ + if (!USE_X && !USE_Y && !USE_Z) + return 0; // weird case, but if we get here, we should have 1 thread + if (!USE_X && !USE_Y && USE_Z) return threadIdx.z; + if (!USE_X && USE_Y && !USE_Z) return threadIdx.y; + if (!USE_X && USE_Y && USE_Z) return threadIdx.y + threadIdx.z * blockDim.y; + if (USE_X && !USE_Y && !USE_Z) return threadIdx.x; + if (USE_X && !USE_Y && USE_Z) return threadIdx.x + threadIdx.z * blockDim.x; + if (USE_X && USE_Y && !USE_Z) return threadIdx.x + threadIdx.y * blockDim.x; + // USE_X && USE_Y && USE_Z + return threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.x * blockDim.y; +} + +/** + * get the number of warps of the current block + * template parameters allow to control which CTA dimensions are used + */ +template +__device__ __forceinline__ int num_warps() +{ + if (!USE_X && !USE_Y && !USE_Z) + return 1; // weird case, but if we get here, we should have 1 thread + if (!USE_X && !USE_Y && USE_Z) return ceil_div(blockDim.z, WARP_SIZE); + if (!USE_X && USE_Y && !USE_Z) return ceil_div(blockDim.y, WARP_SIZE); + if (!USE_X && USE_Y && USE_Z) return ceil_div(blockDim.y * blockDim.z, WARP_SIZE); + if (USE_X && !USE_Y && !USE_Z) return ceil_div(blockDim.x, WARP_SIZE); + if (USE_X && !USE_Y && USE_Z) return ceil_div(blockDim.x * blockDim.z, WARP_SIZE); + if (USE_X && USE_Y && !USE_Z) return ceil_div(blockDim.x * blockDim.y, WARP_SIZE); + // USE_X && USE_Y && USE_Z + return ceil_div(blockDim.x * blockDim.y * blockDim.z, WARP_SIZE); +} + +/** + * get the warp id of the current thread + * template parameters allow to control which CTA dimensions are used + * @note: this only makes sense if the first used dimension of the CTA size + * is a multiple of WARP_SIZE. If this is not the case, use + * `flat_id<...>() / WARP_SIZE` to get the warp id of the current thread + */ +template +__device__ __forceinline__ int warp_id() +{ + if (!USE_X && !USE_Y && !USE_Z) + return 0; // weird case, but if we get here, we should have 1 thread + if (!USE_X && !USE_Y && USE_Z) return threadIdx.z / WARP_SIZE; + if (!USE_X && USE_Y && !USE_Z) return threadIdx.y / WARP_SIZE; + if (!USE_X && USE_Y && USE_Z) + return threadIdx.y / WARP_SIZE + threadIdx.z * num_warps(); + if (USE_X && !USE_Y && !USE_Z) return threadIdx.x / WARP_SIZE; + if (USE_X && !USE_Y && USE_Z) + return threadIdx.x / WARP_SIZE + threadIdx.z * num_warps(); + if (USE_X && USE_Y && !USE_Z) + return threadIdx.x / WARP_SIZE + threadIdx.y * num_warps(); + // USE_X && USE_Y && USE_Z + return threadIdx.x / WARP_SIZE + threadIdx.y * num_warps() + + threadIdx.z * blockDim.y * num_warps(); +} + +/** + * get the block dimension of the current executing block + * template parameters allow to control which CTA dimensions are used + */ +template +__device__ __forceinline__ int block_dim() +{ + if (!USE_X && !USE_Y && !USE_Z) + return 1; // weird case, but if we get here, we should have 1 thread + if (!USE_X && !USE_Y && USE_Z) return blockDim.z; + if (!USE_X && USE_Y && !USE_Z) return blockDim.y; + if (!USE_X && USE_Y && USE_Z) return blockDim.y * blockDim.z; + if (USE_X && !USE_Y && !USE_Z) return blockDim.x; + if (USE_X && !USE_Y && USE_Z) return blockDim.x * blockDim.z; + if (USE_X && USE_Y && !USE_Z) return blockDim.x * blockDim.y; + // USE_X && USE_Y && USE_Z + return blockDim.x * blockDim.y * blockDim.z; +} + +/** + * get the flat id of the current thread (within device/grid) + * template parameters allow to control which grid and block/CTA dimensions are used + */ +template +__device__ __forceinline__ int flat_grid_id() +{ + auto b_id = flat_id(); + auto b_dim = block_dim(); + if (!G_USE_X && !G_USE_Y && !G_USE_Z) + return 0; // weird case, but if we get here, we should have 1 thread + if (!G_USE_X && !G_USE_Y && G_USE_Z) return blockIdx.z * b_dim + b_id; + if (!G_USE_X && G_USE_Y && !G_USE_Z) return blockIdx.y * b_dim + b_id; + if (!G_USE_X && G_USE_Y && G_USE_Z) return blockIdx.y * b_dim + blockIdx.z * blockDim.z + b_id; + if (G_USE_X && !G_USE_Y && !G_USE_Z) return blockIdx.x * b_dim + b_id; + if (G_USE_X && !G_USE_Y && G_USE_Z) return blockIdx.x * b_dim + blockIdx.z * blockDim.z + b_id; + if (G_USE_X && G_USE_Y && !G_USE_Z) return blockIdx.x * b_dim + blockIdx.y * blockDim.y + b_id; + // G_USE_X && G_USE_Y && G_USE_Z + return blockIdx.x * b_dim + blockIdx.y * blockDim.y * blockDim.z + blockIdx.z * blockDim.z + b_id; +} + +} // namespace cugraph::ops::utils diff --git a/cpp/src/from_cugraph_ops/device_smem_helper.cuh b/cpp/src/from_cugraph_ops/device_smem_helper.cuh new file mode 100644 index 00000000000..f1b5be071d9 --- /dev/null +++ b/cpp/src/from_cugraph_ops/device_smem_helper.cuh @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "device_core.hpp" + +#include +#include +#include + +namespace cugraph::ops::utils { + +// The following struct must be used to transmit the size and alignment of +// a field to the shared memory helpers below. +// By default, the alignment is just like the alignment of the original data type. +template +struct field_type { + using data_t = DataT; + static constexpr int32_t BYTES = static_cast(sizeof(DataT)); + static constexpr int32_t ALIGNMENT = ALIGN > 0 ? ALIGN : alignof(DataT); +}; + +// Imagine we have 2 fields of data in shared memory, one for ints, one for doubles. +// The intended usage of the following class in simple cases is as follows: +// 1. specify the type somewhere for both host and kernel code: +// using special_smem_name_t = smem_helper< 0, 0, field_type, field_type >; +// /* can be simplified to the following: */ +// using special_smem_name_t = smem_simple_t< int, double >; +// 2. in host code, get the size of shared memory: +// int32_t smem_sizes[] = {n_ints, n_doubles}; +// /* note: sizes are always in number of elements, not bytes */ +// /* sizes always have type `int32_t` */ +// auto size = special_smem_name_t::get_size(sizes); +// 3. in device code, call the empty constructor: +// special_smem_name_t helper {}; +// int* s_ints; +// double* s_doubles; +// int32_t smem_sizes[] = {n_ints, n_doubles}; +// helper.set_ptrs(sizes, s_ints, s_doubles); +// +// For more complicated use cases, it is often useful to create a struct overloading +// operator[] and passing that to the `get_size` or `set_ptrs` helpers. +// The struct can also be used to directly pass the size information from +// host code (launch) to the kernel, avoiding duplication of calculating sizes. +// Be aware that this overload must have a `__host__ __device__` signature. +// Here is an example struct for the above use case: +// struct sizes_t { +// int32_t n_ints, n_doubles; +// __host__ __device__ sizes_t() = delete; +// __host__ __device__ sizes_t(int32_t _n_ints, int32_t _n_doubles) : +// n_ints(_n_ints), n_doubles(_n_doubles) {} +// +// /* you may also just return int32_t here instead of const int32_t& */ +// __host__ __device__ const int32_t& operator[](int idx) const +// { +// return idx == 0 ? n_ints : n_doubles; +// } +// }; +// +// The ALIGN_INIT template parameter is important for correctness: +// By default (ALIGN_INIT=0), we assume that all alignments are powers of 2, +// and we set ALIGN_INIT to the max alignment of the fields. If you want more +// control, you can set it yourself, but we always assume that it is a multiple +// of all alignment values of the fields. +// +// The N_UNIT_FIELDS template parameters allows specifying sub-spaces +// for a given number of "units" (often warps) such that the first +// `N_UNIT_FIELDS` fields are reserved sub-spaces per unit. +// In this case, the `get_size` and `set_ptrs` methods are modified such that +// you have to specify the number of units, and for `set_ptrs` the unit ID +// as well. +// This is useful for reserving exclusive shared memory per warp for example. +// Each unit (warp) will have its sub-space (containing the `N_UNIT_FIELDS` +// fields) aligned to the initial alignment as described above. +template +class smem_helper { + public: + static constexpr size_t N_ARGS = sizeof...(FieldsT); + + protected: + static_assert(N_ARGS > 0, "smem_helper: must have at least one field type"); + static_assert(N_UNIT_FIELDS >= 0, "smem_helper: #unit fields must be non-negative"); + static_assert(N_UNIT_FIELDS <= N_ARGS, + "smem_helper: #unit fields must be smaller than #field types"); + // following static assertion for FieldsT to not be scalar types is based on + // https://stackoverflow.com/a/28253503/4134127 + template + struct bool_pack; + template + using all_true_t = std::is_same, bool_pack>; + static_assert(all_true_t::value...>::value, + "smem_helper: the given field template types must be of type `field_type` and " + "cannot be scalars"); + + template + __host__ __device__ static constexpr typename std::enable_if<(IDX < N_ARGS), int32_t>::type + max_align() + { + using f_t = typename std::tuple_element>::type; + static_assert(f_t::ALIGNMENT > 0, "field alignments must be greater than 0"); + return max_align() > f_t::ALIGNMENT ? max_align() : f_t::ALIGNMENT; + } + template + __host__ __device__ static constexpr typename std::enable_if<(IDX >= N_ARGS), int32_t>::type + max_align() + { + return -1; + } + + // this is assumed to be a multiple of all alignments + static constexpr int32_t ALIGN_BASE = ALIGN_INIT > 0 ? ALIGN_INIT : max_align<0>(); + + // here we exploit that the base pointer must be aligned to 16 bytes. + // if 16 is a multiple of ALIGN_BASE, that means we don't have any overhead. + // if ALIGN_BASE is a multiple of 16, it means that we need at most + // ALIGN_BASE - 16 extra bytes, otherwise it's ALIGN_BASE - 1 + static constexpr int32_t SIZE_OVERHEAD = 16 % ALIGN_BASE == 0 ? 0 + : ALIGN_BASE % 16 == 0 ? ALIGN_BASE - 16 + : ALIGN_BASE - 1; + + public: + // cannot easily use "= default" here for host-only code + // NOLINTNEXTLINE(modernize-use-equals-default) + __host__ __device__ smem_helper() + { +#if defined(__CUDA_ARCH__) + // must be aligned to 16 bytes on all supported architectures + // (don't have a reference for this at the moment!) + extern __shared__ uint8_t smem[]; + // align manually to `ALIGN_BASE`: this avoids the `__align(X)__` attribute + // which can cause issues if this is used in the same compilation unit + // with different types / alignments. + // In any case, the compiler/hardware cannot do a better job at providing + // an aligned pointer than we can do manually. + auto smem_aligned = align_to(reinterpret_cast(smem), uintptr_t(ALIGN_BASE)); + base_ptr_ = reinterpret_cast(smem_aligned); +#endif + } + + template + __host__ __device__ static inline typename std::enable_if<(N <= 0), int32_t>::type get_size( + const SizeT& sizes) + { + auto current_total = 0; // base pointer must be aligned to ALIGN_BASE + size_helper<1>(current_total, sizes); + return SIZE_OVERHEAD + current_total; + } + + template + __host__ __device__ static inline typename std::enable_if<(N > 0), int32_t>::type get_size( + const int32_t n_units, const SizeT& sizes) + { + auto current_total = 0; // base pointer must be aligned to all alignments + unit_size_helper<1>(current_total, sizes); + // since the unit size is aligned to ALIGN_BASE, every base pointer for + // each unit as well as the base pointer after all units is aligned to + // ALIGN_BASE: since that is a multiple of all alignments, we can safely + // continue adding the sizes afterwards + auto unit_size = align_to(current_total, ALIGN_BASE); + current_total = 0; // base pointer must be aligned to all alignments + size_helper(current_total, sizes); + return SIZE_OVERHEAD + unit_size * n_units + current_total; + } + + template + __device__ inline typename std::enable_if<(N <= 0)>::type set_ptrs( + const SizeT& sizes, typename FieldsT::data_t*&... ptrs) const + { + return ptrs_helper<1>(0, 0, 0, 0, sizes, ptrs...); + } + + template + __device__ inline typename std::enable_if<(N > 0)>::type set_ptrs( + const int32_t& unit_id, + const int32_t& n_units, + const SizeT& sizes, + typename FieldsT::data_t*&... ptrs) const + { + auto current_total = 0; // base pointer must be aligned to all alignments + unit_size_helper<1>(current_total, sizes); + // see explanation in `get_size` for what aligning to ALIGN_BASE means + auto unit_size = align_to(current_total, ALIGN_BASE); + return ptrs_helper<1>(0, unit_id, unit_size, n_units, sizes, ptrs...); + } + + protected: + template + __host__ __device__ static inline void single_size(int32_t& current_total, const SizeT& sizes) + { + using next_field_t = typename std::tuple_element<(NEXT < N_ARGS ? NEXT : N_ARGS - 1), + std::tuple>::type; + using this_field_t = typename std::tuple_element<(NEXT < N_ARGS ? NEXT - 1 : N_ARGS - 1), + std::tuple>::type; + static constexpr int32_t ALIGN = + NEXT == N_UNIT_FIELDS || NEXT >= N_ARGS ? 1 : next_field_t::ALIGNMENT; + current_total = align_to(current_total + sizes[NEXT - 1] * this_field_t::BYTES, ALIGN); + } + + // parentheses in `enable_if` here are used to help the parser understand "<>" + template + __host__ __device__ static inline typename std::enable_if<(NEXT <= N_ARGS)>::type size_helper( + int32_t& current_total, const SizeT& sizes) + { + single_size(current_total, sizes); + size_helper(current_total, sizes); + } + template + __host__ __device__ static inline typename std::enable_if<(NEXT > N_ARGS)>::type size_helper( + int32_t& /* current_total */, const SizeT& /* sizes */) + { + } + + template + __host__ __device__ static inline typename std::enable_if<(NEXT <= N_UNIT_FIELDS)>::type + unit_size_helper(int32_t& current_total, const SizeT& sizes) + { + single_size(current_total, sizes); + unit_size_helper(current_total, sizes); + } + template + __host__ __device__ static inline typename std::enable_if<(NEXT > N_UNIT_FIELDS)>::type + unit_size_helper(int32_t& /* current_total */, const SizeT& /* sizes */) + { + } + + template + __device__ inline void ptrs_helper(const int32_t& /* offset */, + const int32_t& /* unit_id */, + const int32_t& /* unit_size */, + const int32_t& /* n_units */, + const SizeT& /* sizes */) const + { + } + template + __device__ inline void ptrs_helper(const int32_t& offset, + const int32_t& unit_id, + const int32_t& unit_size, + const int32_t& n_units, + const SizeT& sizes, + PtrT*& ptr, + PtrsT*&... ptrs) const + { + // see `get_size`: base_ptr_ + u_off is always aligned to all alignments + // (whether for each individual unit or after all units) + auto u_off = NEXT <= N_UNIT_FIELDS ? unit_id * unit_size : n_units * unit_size; + ptr = reinterpret_cast(base_ptr_ + (u_off + offset)); + int32_t next_offset = offset; + if (NEXT == N_UNIT_FIELDS) + next_offset = 0; // pointer after all unit fields is aligned to all alignments + else + single_size(next_offset, sizes); + ptrs_helper(next_offset, unit_id, unit_size, n_units, sizes, ptrs...); + } + + uint8_t* base_ptr_{nullptr}; +}; + +template +using smem_simple_t = smem_helper<0, 0, field_type...>; + +template +using smem_unit_simple_t = smem_helper<0, N_UNIT_FIELDS, field_type...>; + +} // namespace cugraph::ops::utils diff --git a/cpp/src/from_cugraph_ops/device_warp_collectives.cuh b/cpp/src/from_cugraph_ops/device_warp_collectives.cuh new file mode 100644 index 00000000000..198b3be2f12 --- /dev/null +++ b/cpp/src/from_cugraph_ops/device_warp_collectives.cuh @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "device_core.hpp" +#include "device_dim.cuh" +#include "macros.hpp" + +#include + +namespace cugraph::ops::utils { + +/** + * @brief get a bit mask for the `n_threads` lowest threads of a warp + * + * @param[in] n_threads number of threads in the mask + * + * @return the bit mask + */ +__host__ __device__ constexpr uint32_t low_thread_mask(int n_threads) +{ + return n_threads >= WARP_SIZE ? 0xffffffffU : (1U << n_threads) - 1U; +} + +/** + * apply a warp-wide sync (useful from Volta+ archs) + * + * @tparam NP number of participating threads + * + * @note This works on Pascal and earlier archs as well, but all threads with + * lane id <= NP must enter this function together and in convergence. + */ +template +__device__ inline void warp_sync() +{ + __syncwarp(low_thread_mask(NP)); +} + +/** + * @brief Shuffle the data inside a warp + * + * @tparam DataT the data type (currently assumed to be 4B) + * + * @param[in] val value to be shuffled + * @param[in] src_lane lane from where to shuffle + * @param[in] width lane width + * @param[in] mask mask of participating threads (Volta+) + * + * @return the shuffled data + */ +template +__device__ inline DataT shfl(DataT val, + int src_lane, + int width = WARP_SIZE, + uint32_t mask = 0xffffffffU) +{ + static_assert(CUDART_VERSION >= CUDA_VER_WARP_SHFL, + "Expected CUDA >= 9 for warp synchronous shuffle"); + return __shfl_sync(mask, val, src_lane, width); +} + +/** + * @brief Warp-level sum reduction + * + * @tparam DataT data type + * @tparam NP number of participating threads. + * must be a power of 2 and at most warp size + * + * @param[in] val input value + * + * @return only the lane0 will contain valid reduced result + * + * @note Why not cub? Because cub doesn't seem to allow working with arbitrary + * number of warps in a block. + * + * @note All threads with lane id <= NP must enter this function together + * + * TODO(mjoux) Expand this to support arbitrary reduction ops + */ +template +__device__ inline DataT warp_reduce(DataT val) +{ + static constexpr uint32_t MASK = low_thread_mask(NP); + CUGRAPH_OPS_UNROLL + for (int i = NP / 2; i > 0; i >>= 1) { + DataT tmp = shfl(val, lane_id() + i, NP, MASK); + val += tmp; + } + return val; +} + +} // namespace cugraph::ops::utils diff --git a/cpp/src/from_cugraph_ops/macros.hpp b/cpp/src/from_cugraph_ops/macros.hpp new file mode 100644 index 00000000000..0ff08af0b1a --- /dev/null +++ b/cpp/src/from_cugraph_ops/macros.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#define CUGRAPH_OPS_STRINGIFY_DETAIL(x) #x +#define CUGRAPH_OPS_STRINGIFY(x) CUGRAPH_OPS_STRINGIFY_DETAIL(x) + +#define CUGRAPH_OPS_UNROLL _Pragma("unroll") +#if defined(__clang__) && defined(__CUDA__) +// clang wants pragma unroll without parentheses +#define CUGRAPH_OPS_UNROLL_N(n) _Pragma(CUGRAPH_OPS_STRINGIFY(unroll n)) +#else +// nvcc / nvrtc want pragma unroll with parentheses +#define CUGRAPH_OPS_UNROLL_N(n) _Pragma(CUGRAPH_OPS_STRINGIFY(unroll(n))) +#endif + +#if defined(__clang__) +#define CUGRAPH_OPS_CONSTEXPR_D constexpr +#else +#define CUGRAPH_OPS_CONSTEXPR_D constexpr __device__ +#endif + +#if defined(__CUDACC__) || defined(__CUDA__) +#define CUGRAPH_OPS_HD __host__ __device__ +#else +#define CUGRAPH_OPS_HD +#endif + +// The CUGRAPH_OPS_KERNEL specificies that a kernel has hidden visibility +// +// cugraph-ops needs to ensure that the visibility of its CUGRAPH_OPS_KERNEL function +// templates have hidden visibility ( default is weak visibility). +// +// When kernels have weak visibility it means that if two dynamic libraries +// both contain identical instantiations of a kernel/template, then the linker +// will discard one of the two instantiations and use only one of them. +// +// Do to unique requirements of how the CUDA works this de-deduplication +// can lead to the wrong kernels being called ( SM version being wrong ), +// silently no kernel being called at all, or cuda runtime errors being +// thrown. +// +// https://github.com/rapidsai/raft/issues/1722 +#ifndef CUGRAPH_OPS_KERNEL +#define CUGRAPH_OPS_KERNEL __global__ static +#endif diff --git a/cpp/src/from_cugraph_ops/sampling.hpp b/cpp/src/from_cugraph_ops/sampling.hpp new file mode 100644 index 00000000000..5663b8d9c03 --- /dev/null +++ b/cpp/src/from_cugraph_ops/sampling.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +// FIXME: This is only here for the prims... +// Need to look how Seunghwa fixed this in his PR +#include + +#include + +#include + +#include + +namespace cugraph::legacy::ops::graph { + +/** + * @brief Generate indexes given population sizes and a sample size, + * with or without replacement + * + * @param[out] index The (dense) index matrix. [on device] + * [dim = `n_sizes x sample_size`] + * In case `replace` is `false`, this may contain + * `ops::graph::INVALID_ID` + * if no index could be generated. + * @param[inout] rng RAFT RngState state object + * @param[in] sizes Input array of population sizes [on device] + * [len = `n_sizes`] + * @param[in] n_sizes number of sizes to sample from. + * @param[in] sample_size max number of indexes to be sampled per element + * in `sizes`. Assumed to be <= 384 at the moment. + * @param[in] replace If `true`, sample with replacement, otherwise + * without replacement. + * @param[in] stream cuda stream + * + @{ + */ +void get_sampling_index(int32_t* index, + raft::random::RngState& rng, + const int32_t* sizes, + int32_t n_sizes, + int32_t sample_size, + bool replace, + cudaStream_t stream); +void get_sampling_index(int64_t* index, + raft::random::RngState& rng, + const int64_t* sizes, + int64_t n_sizes, + int32_t sample_size, + bool replace, + cudaStream_t stream); + +} // namespace cugraph::legacy::ops::graph diff --git a/cpp/src/from_cugraph_ops/sampling_index.cu b/cpp/src/from_cugraph_ops/sampling_index.cu new file mode 100644 index 00000000000..fb1f4ac3f1e --- /dev/null +++ b/cpp/src/from_cugraph_ops/sampling_index.cu @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#include "sampling.hpp" +#include "sampling_index.cuh" + +namespace cugraph::legacy::ops::graph { + +void get_sampling_index(int32_t* index, + raft::random::RngState& rng, + const int32_t* sizes, + int32_t n_sizes, + int32_t sample_size, + bool replace, + cudaStream_t stream) +{ + get_sampling_index_impl(index, rng, sizes, n_sizes, sample_size, replace, stream); +} + +void get_sampling_index(int64_t* index, + raft::random::RngState& rng, + const int64_t* sizes, + int64_t n_sizes, + int32_t sample_size, + bool replace, + cudaStream_t stream) +{ + get_sampling_index_impl(index, rng, sizes, n_sizes, sample_size, replace, stream); +} + +} // namespace cugraph::legacy::ops::graph diff --git a/cpp/src/from_cugraph_ops/sampling_index.cuh b/cpp/src/from_cugraph_ops/sampling_index.cuh new file mode 100644 index 00000000000..9ac574315bb --- /dev/null +++ b/cpp/src/from_cugraph_ops/sampling_index.cuh @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. + * + * This source code and/or documentation ("Licensed Deliverables") are + * subject to NVIDIA intellectual property rights under U.S. and + * international Copyright laws. + */ + +#pragma once + +#include "algo_R.cuh" +#include "sampling.hpp" + +#include +#include + +#include + +namespace cugraph::legacy::ops::graph { + +namespace utils = cugraph::ops::utils; + +template +using smem_algo_r_t = utils::smem_unit_simple_t<1, IdxT>; + +template +CUGRAPH_OPS_KERNEL void index_replace_kernel(raft::random::DeviceState rng_state, + IdxT* index, + const IdxT* sizes, + IdxT n_sizes, + int sample_size) +{ + using rand_t = std::make_unsigned_t; + // a warp-wide implementation. + auto lane = cugraph::ops::utils::lane_id(); + auto warp = utils::warp_id(); // 1D block with X dim + auto n_warps = utils::num_warps(); // 1D block with X dim + auto row_id = warp + static_cast(blockIdx.x) * IdxT{n_warps}; + if (row_id >= n_sizes) return; + // 1. load population size (once per warp) + IdxT size = IdxT{0}; + if (lane == 0) size = sizes[row_id]; + + // 2. shuffle it to all threads in warp + size = utils::shfl(size, 0); + + // 3. check valid size: possible early-out + if (size <= 0) { + CUGRAPH_OPS_UNROLL + for (auto i = lane; i < sample_size; i += utils::WARP_SIZE) { + index[row_id * IdxT{sample_size} + IdxT{i}] = cugraph::invalid_idx::value; + } + return; + } + + // 4. every thread generates its indexes + auto flat_id = static_cast(threadIdx.x + blockIdx.x * blockDim.x); + GenT gen(rng_state, flat_id); + raft::random::UniformIntDistParams int_params{}; + int_params.start = IdxT{0}; + int_params.end = size; + int_params.diff = static_cast(size); + CUGRAPH_OPS_UNROLL + for (auto i = lane; i < sample_size; i += utils::WARP_SIZE) { + IdxT idx = IdxT{0}; + raft::random::custom_next(gen, &idx, int_params, 0, 0 /* idx / stride unused */); + + // 5. output index + index[row_id * IdxT{sample_size} + IdxT{i}] = idx; + } +} + +template +void get_sampling_index_replace(IdxT* index, + raft::random::RngState& rng, + const IdxT* sizes, + IdxT n_sizes, + int32_t sample_size, + cudaStream_t stream) +{ + // keep thread per block fairly low since we can expect sample_size < warp_size + // thus we want to have as many blocks as possible to increase parallelism + static constexpr int TPB = 128; + static constexpr int N_WARPS = TPB / utils::WARP_SIZE; + auto n_blks = utils::ceil_div(n_sizes, N_WARPS); + RAFT_CALL_RNG_FUNC( + rng, (index_replace_kernel<<>>), index, sizes, n_sizes, sample_size); + auto thread_rs = utils::ceil_div(IdxT{sample_size}, utils::WARP_SIZE); + rng.advance(static_cast(n_blks * TPB), thread_rs * sizeof(IdxT) / sizeof(int32_t)); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +template +CUGRAPH_OPS_KERNEL void index_algo_r_kernel(raft::random::DeviceState rng_state, + IdxT* index, + const IdxT* sizes, + IdxT n_sizes, + int sample_size) +{ + using rand_t = std::make_unsigned_t; + // a warp-wide implementation. + auto lane = utils::lane_id(); + auto warp = utils::warp_id(); // 1D block with X dim + auto row_id = warp + static_cast(blockIdx.x) * IdxT{N_WARPS}; + if (row_id >= n_sizes) return; + IdxT* s_idx; + smem_algo_r_t smem{}; + int32_t smem_sizes[] = {sample_size}; + smem.set_ptrs(warp, N_WARPS, smem_sizes, s_idx); + // 1. load population size (once per warp) + IdxT size = IdxT{0}; + if (lane == 0) size = sizes[row_id]; + + // 2. shuffle it to all threads in warp + size = utils::shfl(size, 0); + + // 3. Get algo R indexes per warp + cugraph::ops::graph::warp_algo_r_index( + s_idx, size, IdxT{0}, sample_size, rng_state); + + CUGRAPH_OPS_UNROLL + for (auto i = lane; i < sample_size; i += utils::WARP_SIZE) { + // 4. output index + // still need to check if the index is actually valid + auto idx = s_idx[i]; + index[row_id * IdxT{sample_size} + IdxT{i}] = + idx >= size ? cugraph::invalid_idx::value : idx; + } +} + +template +void get_sampling_index_reservoir(IdxT* index, + raft::random::RngState& rng, + const IdxT* sizes, + IdxT n_sizes, + int32_t sample_size, + cudaStream_t stream) +{ + // same TPB as in algo R: increased SM occupancy is most important here + static constexpr int TPB = 512; + static constexpr int N_WARPS = TPB / utils::WARP_SIZE; + auto n_blks = utils::ceil_div(n_sizes, N_WARPS); + int32_t smem_sizes[] = {sample_size}; + size_t smem_size = smem_algo_r_t::get_size(N_WARPS, smem_sizes); + RAFT_CALL_RNG_FUNC(rng, + (index_algo_r_kernel<<>>), + index, + sizes, + n_sizes, + sample_size); + auto thread_rs = utils::ceil_div( + std::max(IdxT{0}, std::min(std::numeric_limits::max(), n_sizes) - IdxT{sample_size}), + utils::WARP_SIZE); + rng.advance(static_cast(n_blks * TPB), thread_rs * sizeof(IdxT) / sizeof(int32_t)); + RAFT_CUDA_TRY(cudaGetLastError()); +} + +template +void get_sampling_index_impl(IdxT* index, + raft::random::RngState& rng, + const IdxT* sizes, + IdxT n_sizes, + int32_t sample_size, + bool replace, + cudaStream_t stream) +{ + if (replace) { + get_sampling_index_replace(index, rng, sizes, n_sizes, sample_size, stream); + } else { + get_sampling_index_reservoir(index, rng, sizes, n_sizes, sample_size, stream); + } +} + +} // namespace cugraph::legacy::ops::graph diff --git a/cpp/src/generators/erdos_renyi_generator.cuh b/cpp/src/generators/erdos_renyi_generator.cuh index cd461ee1aa2..10573ddb0d0 100644 --- a/cpp/src/generators/erdos_renyi_generator.cuh +++ b/cpp/src/generators/erdos_renyi_generator.cuh @@ -40,6 +40,11 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, vertex_t base_vertex_id, uint64_t seed) { + // NOTE: + // https://networkx.org/documentation/stable/_modules/networkx/generators/random_graphs.html#fast_gnp_random_graph + // identifies a faster algorithm that I think would be very efficient on the GPU. I believe we + // could just compute lr/lp in that code for a batch of values, use prefix sums to generate edge + // ids and then convert the generated values to a batch of edges. CUGRAPH_EXPECTS(num_vertices < std::numeric_limits::max(), "Implementation cannot support specified value"); @@ -88,6 +93,11 @@ generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, uint64_t seed) { CUGRAPH_FAIL("Not implemented"); + + // To implement: + // Use sampling function to select `m` unique edge ids from the + // (num_vertices ^ 2) possible edges. Convert these to vertex + // ids. } } // namespace cugraph diff --git a/cpp/src/layout/legacy/barnes_hut.cuh b/cpp/src/layout/legacy/barnes_hut.cuh index fa6d3816417..fdd57c7772d 100644 --- a/cpp/src/layout/legacy/barnes_hut.cuh +++ b/cpp/src/layout/legacy/barnes_hut.cuh @@ -19,7 +19,6 @@ #include "bh_kernels.cuh" #include "converters/legacy/COOtoCSR.cuh" #include "fa2_kernels.cuh" -#include "utilities/graph_utils.cuh" #include "utils.hpp" #include diff --git a/cpp/src/layout/legacy/fa2_kernels.cuh b/cpp/src/layout/legacy/fa2_kernels.cuh index 33e7841a380..195889eebfb 100644 --- a/cpp/src/layout/legacy/fa2_kernels.cuh +++ b/cpp/src/layout/legacy/fa2_kernels.cuh @@ -17,7 +17,9 @@ #pragma once #define restrict __restrict__ -#include "utilities/graph_utils.cuh" +// From old graph_utils.cuh +#define CUDA_MAX_BLOCKS 65535 +#define CUDA_MAX_KERNEL_THREADS 256 // kernel will launch at most 256 threads per block namespace cugraph { namespace detail { diff --git a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh index 43415ba6df4..a6a164d36c1 100644 --- a/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh +++ b/cpp/src/prims/detail/sample_and_compute_local_nbr_indices.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "from_cugraph_ops/sampling.hpp" #include "prims/detail/partition_v_frontier.cuh" #include "prims/detail/transform_v_frontier_e.cuh" #include "prims/property_op_utils.cuh" @@ -33,9 +34,6 @@ #include #include -#ifndef NO_CUGRAPH_OPS -#include -#endif #include #include @@ -394,11 +392,11 @@ compute_unique_keys(raft::handle_t const& handle, cuda::proclaim_return_type( [unique_key_first = get_dataframe_buffer_begin(aggregate_local_frontier_unique_keys) + local_frontier_unique_key_displacements[i], - num_unique_keys = local_frontier_unique_key_sizes[i]] __device__(key_t key) { + unique_key_last = get_dataframe_buffer_begin(aggregate_local_frontier_unique_keys) + + local_frontier_unique_key_displacements[i] + + local_frontier_unique_key_sizes[i]] __device__(key_t key) { return static_cast(thrust::distance( - unique_key_first, - thrust::lower_bound( - thrust::seq, unique_key_first, unique_key_first + num_unique_keys, key))); + unique_key_first, thrust::find(thrust::seq, unique_key_first, unique_key_last, key))); })); } @@ -639,7 +637,7 @@ rmm::device_uvector compute_uniform_sampling_index_without_replacement( auto mid_partition_size = frontier_partition_offsets[2] - frontier_partition_offsets[1]; if (mid_partition_size > 0) { // FIXME: tmp_degrees & tmp_nbr_indices can be avoided if we customize - // cugraph::ops::get_sampling_index + // cugraph::legacy::ops::get_sampling_index rmm::device_uvector tmp_degrees(mid_partition_size, handle.get_stream()); rmm::device_uvector tmp_nbr_indices(mid_partition_size * K, handle.get_stream()); thrust::gather(handle.get_thrust_policy(), @@ -647,13 +645,13 @@ rmm::device_uvector compute_uniform_sampling_index_without_replacement( frontier_indices.begin() + frontier_partition_offsets[2], frontier_degrees.begin(), tmp_degrees.begin()); - cugraph::ops::graph::get_sampling_index(tmp_nbr_indices.data(), - rng_state, - tmp_degrees.data(), - mid_partition_size, - static_cast(K), - false, - handle.get_stream()); + cugraph::legacy::ops::graph::get_sampling_index(tmp_nbr_indices.data(), + rng_state, + tmp_degrees.data(), + mid_partition_size, + static_cast(K), + false, + handle.get_stream()); thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), @@ -736,7 +734,7 @@ rmm::device_uvector compute_uniform_sampling_index_without_replacement( } if (retry_segment_indices) { - cugraph::ops::graph::get_sampling_index( + cugraph::legacy::ops::graph::get_sampling_index( (*retry_nbr_indices).data(), rng_state, (*retry_degrees).begin(), @@ -752,7 +750,7 @@ rmm::device_uvector compute_uniform_sampling_index_without_replacement( segment_frontier_degree_first, segment_frontier_degree_first + num_segments, tmp_degrees.begin()); - cugraph::ops::graph::get_sampling_index( + cugraph::legacy::ops::graph::get_sampling_index( tmp_nbr_indices.data(), rng_state, tmp_degrees.data(), @@ -1626,13 +1624,13 @@ uniform_sample_and_compute_local_nbr_indices( if (with_replacement) { if (frontier_degrees.size() > 0) { nbr_indices.resize(frontier_degrees.size() * K, handle.get_stream()); - cugraph::ops::graph::get_sampling_index(nbr_indices.data(), - rng_state, - frontier_degrees.data(), - static_cast(frontier_degrees.size()), - static_cast(K), - with_replacement, - handle.get_stream()); + cugraph::legacy::ops::graph::get_sampling_index(nbr_indices.data(), + rng_state, + frontier_degrees.data(), + static_cast(frontier_degrees.size()), + static_cast(K), + with_replacement, + handle.get_stream()); frontier_degrees.resize(0, handle.get_stream()); frontier_degrees.shrink_to_fit(handle.get_stream()); } @@ -1761,8 +1759,7 @@ biased_sample_and_compute_local_nbr_indices( std::optional> key_indices{std::nullopt}; std::vector local_frontier_sample_offsets{}; if (with_replacement) { - // computet segmented inclusive sums (one segment per seed) - + // compute segmented inclusive sums (one segment per seed) auto unique_key_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), cuda::proclaim_return_type( @@ -2041,7 +2038,7 @@ biased_sample_and_compute_local_nbr_indices( zero_bias_frontier_indices.resize(zero_bias_count_inclusive_sums.back(), handle.get_stream()); zero_bias_frontier_indices.shrink_to_fit(handle.get_stream()); - zero_bias_local_nbr_indices.resize(frontier_indices.size(), handle.get_stream()); + zero_bias_local_nbr_indices.resize(zero_bias_frontier_indices.size(), handle.get_stream()); zero_bias_local_nbr_indices.shrink_to_fit(handle.get_stream()); std::vector zero_bias_counts(zero_bias_count_inclusive_sums.size()); std::adjacent_difference(zero_bias_count_inclusive_sums.begin(), diff --git a/cpp/src/prims/detail/transform_v_frontier_e.cuh b/cpp/src/prims/detail/transform_v_frontier_e.cuh index 7d8824849f0..5ebcddfe8da 100644 --- a/cpp/src/prims/detail/transform_v_frontier_e.cuh +++ b/cpp/src/prims/detail/transform_v_frontier_e.cuh @@ -209,9 +209,6 @@ __global__ static void transform_v_frontier_e_mid_degree( auto const lane_id = tid % raft::warp_size(); size_t idx = static_cast(tid / raft::warp_size()); - using WarpScan = cub::WarpScan; - __shared__ typename WarpScan::TempStorage temp_storage; - while (idx < static_cast(thrust::distance(edge_partition_frontier_key_index_first, edge_partition_frontier_key_index_last))) { auto key_idx = *(edge_partition_frontier_key_index_first + idx); @@ -224,16 +221,15 @@ __global__ static void transform_v_frontier_e_mid_degree( thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); auto this_key_value_first = value_first + edge_partition_frontier_local_degree_offsets[key_idx]; if (edge_partition_e_mask) { - // FIXME: it might be faster to update in warp-sync way - edge_t counter{0}; - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { ++counter; } - } - edge_t offset_within_warp{}; - WarpScan(temp_storage).ExclusiveSum(counter, offset_within_warp); - counter = 0; - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + (raft::warp_size() - 1)) / raft::warp_size()) * + raft::warp_size(); + edge_t base_offset{0}; + for (edge_t i = lane_id; i < rounded_up_local_degree; i += raft::warp_size()) { + auto valid = (i < local_degree) && (*edge_partition_e_mask).get(edge_offset + i); + auto ballot = __ballot_sync(raft::warp_full_mask(), valid ? uint32_t{1} : uint32_t{0}); + if (valid) { + auto intra_warp_offset = __popc(ballot & ~(raft::warp_full_mask() << lane_id)); transform_v_frontier_e_update_buffer_element( edge_partition, key, @@ -244,9 +240,9 @@ __global__ static void transform_v_frontier_e_mid_degree( edge_partition_dst_value_input, edge_partition_e_value_input, e_op, - this_key_value_first + offset_within_warp + counter); - ++counter; + this_key_value_first + base_offset + intra_warp_offset); } + base_offset += __popc(ballot); } } else { for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { @@ -300,6 +296,7 @@ __global__ static void transform_v_frontier_e_high_degree( using BlockScan = cub::BlockScan; __shared__ typename BlockScan::TempStorage temp_storage; + __shared__ edge_t increment; while (idx < static_cast(thrust::distance(edge_partition_frontier_key_index_first, edge_partition_frontier_key_index_last))) { @@ -313,16 +310,16 @@ __global__ static void transform_v_frontier_e_high_degree( thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); auto this_key_value_first = value_first + edge_partition_frontier_local_degree_offsets[key_idx]; if (edge_partition_e_mask) { - // FIXME: it might be faster to update in block-sync way - edge_t counter{0}; - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { ++counter; } - } - edge_t offset_within_block{}; - BlockScan(temp_storage).ExclusiveSum(counter, offset_within_block); - counter = 0; - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - if ((*edge_partition_e_mask).get(edge_offset + i)) { + auto rounded_up_local_degree = + ((static_cast(local_degree) + (transform_v_frontier_e_kernel_block_size - 1)) / + transform_v_frontier_e_kernel_block_size) * + transform_v_frontier_e_kernel_block_size; + edge_t base_offset{0}; + for (size_t i = threadIdx.x; i < rounded_up_local_degree; i += blockDim.x) { + auto valid = (i < local_degree) && (*edge_partition_e_mask).get(edge_offset + i); + edge_t intra_block_offset{}; + BlockScan(temp_storage).ExclusiveSum(valid ? edge_t{1} : edge_t{0}, intra_block_offset); + if (valid) { transform_v_frontier_e_update_buffer_element( edge_partition, key, @@ -333,9 +330,13 @@ __global__ static void transform_v_frontier_e_high_degree( edge_partition_dst_value_input, edge_partition_e_value_input, e_op, - this_key_value_first + offset_within_block + counter); - ++counter; + this_key_value_first + base_offset + intra_block_offset); + } + if (threadIdx.x == transform_v_frontier_e_kernel_block_size - 1) { + increment = intra_block_offset + (valid ? edge_t{1} : edge_t{0}); } + __syncthreads(); + base_offset += increment; } } else { for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { diff --git a/cpp/src/prims/per_v_pair_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_dst_nbr_intersection.cuh new file mode 100644 index 00000000000..01c76e5085a --- /dev/null +++ b/cpp/src/prims/per_v_pair_dst_nbr_intersection.cuh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "prims/detail/nbr_intersection.cuh" + +#include + +#include + +#include + +namespace cugraph { + +/** + * @brief Iterate over each input vertex pair and returns the common destination neighbor list + * pair in a CSR-like format + * + * Iterate over every vertex pair; intersect destination neighbor lists of the two vertices in the + * pair and store the result in a CSR-like format + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexPairIterator Type of the iterator for input vertex pairs. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_pair_first Iterator pointing to the first (inclusive) input vertex pair. + * @param vertex_pair_last Iterator pointing to the last (exclusive) input vertex pair. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple Tuple of intersection offsets and indices. + */ +template +std::tuple, rmm::device_uvector> +per_v_pair_dst_nbr_intersection(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexPairIterator vertex_pair_first, + VertexPairIterator vertex_pair_last, + bool do_expensive_check = false) +{ + static_assert(!GraphViewType::is_storage_transposed); + + return detail::nbr_intersection(handle, + graph_view, + cugraph::edge_dummy_property_t{}.view(), + vertex_pair_first, + vertex_pair_last, + std::array{true, true}, + do_expensive_check); +} + +} // namespace cugraph diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 015a9c683f1..9d0f711d106 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "from_cugraph_ops/sampling.hpp" #include "prims/detail/sample_and_compute_local_nbr_indices.cuh" #include "prims/property_op_utils.cuh" @@ -30,9 +31,6 @@ #include #include -#ifndef NO_CUGRAPH_OPS -#include -#endif #include #include @@ -353,7 +351,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, uniform_sample_and_compute_local_nbr_indices( handle, graph_view, - (minor_comm_size > 1) ? get_dataframe_buffer_begin(*aggregate_local_frontier) + (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) : frontier.begin(), local_frontier_displacements, local_frontier_sizes, @@ -365,7 +363,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, biased_sample_and_compute_local_nbr_indices( handle, graph_view, - (minor_comm_size > 1) ? get_dataframe_buffer_begin(*aggregate_local_frontier) + (minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) : frontier.begin(), edge_bias_src_value_input, edge_bias_dst_value_input, @@ -394,7 +392,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, graph_view.local_edge_partition_view(i)); auto edge_partition_frontier_key_first = - ((minor_comm_size > 1) ? get_dataframe_buffer_begin(*aggregate_local_frontier) + ((minor_comm_size > 1) ? get_dataframe_buffer_cbegin(*aggregate_local_frontier) : frontier.begin()) + local_frontier_displacements[i]; auto edge_partition_sample_local_nbr_index_first = diff --git a/cpp/src/sampling/negative_sampling_impl.cuh b/cpp/src/sampling/negative_sampling_impl.cuh new file mode 100644 index 00000000000..93bb03077bc --- /dev/null +++ b/cpp/src/sampling/negative_sampling_impl.cuh @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "prims/reduce_v.cuh" +#include "prims/update_edge_src_dst_property.cuh" +#include "utilities/collect_comm.cuh" + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cugraph { + +namespace detail { + +template +std::tuple>, + std::optional>> +normalize_biases(raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::device_span biases) +{ + std::optional> normalized_biases{std::nullopt}; + std::optional> gpu_biases{std::nullopt}; + + // Need to normalize the biases + normalized_biases = + std::make_optional>(biases.size(), handle.get_stream()); + + weight_t sum = + thrust::reduce(handle.get_thrust_policy(), biases.begin(), biases.end(), weight_t{0}); + + thrust::transform(handle.get_thrust_policy(), + biases.begin(), + biases.end(), + normalized_biases->begin(), + divider_t{sum}); + + thrust::inclusive_scan(handle.get_thrust_policy(), + normalized_biases->begin(), + normalized_biases->end(), + normalized_biases->begin()); + + if constexpr (multi_gpu) { + rmm::device_scalar d_sum(sum, handle.get_stream()); + + gpu_biases = cugraph::device_allgatherv( + handle, handle.get_comms(), raft::device_span{d_sum.data(), d_sum.size()}); + + weight_t aggregate_sum = thrust::reduce( + handle.get_thrust_policy(), gpu_biases->begin(), gpu_biases->end(), weight_t{0}); + + // FIXME: https://github.com/rapidsai/raft/issues/2400 results in the possibility + // that 1 can appear as a random floating point value. We're going to use + // thrust::upper_bound to assign random values to GPUs, we need the value 1.0 to + // be part of the upper-most range. We'll compute the last non-zero value in the + // gpu_biases array here and below we will fill it with a value larger than 1.0 + size_t trailing_zeros = thrust::distance( + thrust::make_reverse_iterator(gpu_biases->end()), + thrust::find_if(handle.get_thrust_policy(), + thrust::make_reverse_iterator(gpu_biases->end()), + thrust::make_reverse_iterator(gpu_biases->begin()), + [] __device__(weight_t bias) { return bias > weight_t{0}; })); + + thrust::transform(handle.get_thrust_policy(), + gpu_biases->begin(), + gpu_biases->end(), + gpu_biases->begin(), + divider_t{aggregate_sum}); + + thrust::inclusive_scan( + handle.get_thrust_policy(), gpu_biases->begin(), gpu_biases->end(), gpu_biases->begin()); + + // FIXME: conclusion of above. Using 1.1 since it is > 1.0 and easy to type + thrust::copy_n(handle.get_thrust_policy(), + thrust::make_constant_iterator(1.1), + trailing_zeros + 1, + gpu_biases->begin() + gpu_biases->size() - trailing_zeros - 1); + } + + return std::make_tuple(std::move(normalized_biases), std::move(gpu_biases)); +} + +template +rmm::device_uvector create_local_samples( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> const& normalized_biases, + std::optional> const& gpu_biases, + size_t samples_in_this_batch) +{ + rmm::device_uvector samples(0, handle.get_stream()); + + if (normalized_biases) { + size_t samples_to_generate{samples_in_this_batch}; + std::vector sample_count_from_each_gpu; + + rmm::device_uvector position(0, handle.get_stream()); + + if constexpr (multi_gpu) { + // Determine how many vertices are generated on each GPU + auto const comm_size = handle.get_comms().get_size(); + auto const comm_rank = handle.get_comms().get_rank(); + + sample_count_from_each_gpu.resize(comm_size); + + rmm::device_uvector gpu_counts(comm_size, handle.get_stream()); + position.resize(samples_in_this_batch, handle.get_stream()); + + thrust::fill(handle.get_thrust_policy(), gpu_counts.begin(), gpu_counts.end(), size_t{0}); + thrust::sequence(handle.get_thrust_policy(), position.begin(), position.end()); + + rmm::device_uvector random_values(samples_in_this_batch, handle.get_stream()); + detail::uniform_random_fill(handle.get_stream(), + random_values.data(), + random_values.size(), + weight_t{0}, + weight_t{1}, + rng_state); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(random_values.begin(), position.begin()), + thrust::make_zip_iterator(random_values.end(), position.end())); + + thrust::upper_bound(handle.get_thrust_policy(), + random_values.begin(), + random_values.end(), + gpu_biases->begin(), + gpu_biases->end(), + gpu_counts.begin()); + + thrust::adjacent_difference( + handle.get_thrust_policy(), gpu_counts.begin(), gpu_counts.end(), gpu_counts.begin()); + + std::vector tx_counts(gpu_counts.size()); + std::fill(tx_counts.begin(), tx_counts.end(), size_t{1}); + + rmm::device_uvector d_sample_count_from_each_gpu(0, handle.get_stream()); + + std::tie(d_sample_count_from_each_gpu, std::ignore) = + shuffle_values(handle.get_comms(), gpu_counts.begin(), tx_counts, handle.get_stream()); + + samples_to_generate = thrust::reduce(handle.get_thrust_policy(), + d_sample_count_from_each_gpu.begin(), + d_sample_count_from_each_gpu.end(), + size_t{0}); + + raft::update_host(sample_count_from_each_gpu.data(), + d_sample_count_from_each_gpu.data(), + d_sample_count_from_each_gpu.size(), + handle.get_stream()); + } + + // Generate samples + // FIXME: We could save this memory if we had an iterator that + // generated random values. + rmm::device_uvector random_values(samples_to_generate, handle.get_stream()); + samples.resize(samples_to_generate, handle.get_stream()); + detail::uniform_random_fill(handle.get_stream(), + random_values.data(), + random_values.size(), + weight_t{0}, + weight_t{1}, + rng_state); + + thrust::transform( + handle.get_thrust_policy(), + random_values.begin(), + random_values.end(), + samples.begin(), + [biases = + raft::device_span{normalized_biases->data(), normalized_biases->size()}, + offset = graph_view.local_vertex_partition_range_first()] __device__(weight_t r) { + size_t result = + offset + + static_cast(thrust::distance( + biases.begin(), thrust::lower_bound(thrust::seq, biases.begin(), biases.end(), r))); + + // FIXME: https://github.com/rapidsai/raft/issues/2400 + // results in the possibility that 1 can appear as a + // random floating point value, which results in the sampling + // algorithm below generating a value that's OOB. + if (result == (offset + biases.size())) --result; + + return result; + }); + + // Shuffle them back + if constexpr (multi_gpu) { + std::tie(samples, std::ignore) = shuffle_values( + handle.get_comms(), samples.begin(), sample_count_from_each_gpu, handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(position.begin(), samples.begin()), + thrust::make_zip_iterator(position.end(), samples.begin())); + } + } else { + samples.resize(samples_in_this_batch, handle.get_stream()); + + // Uniformly select a vertex from any GPU + detail::uniform_random_fill(handle.get_stream(), + samples.data(), + samples.size(), + vertex_t{0}, + graph_view.number_of_vertices(), + rng_state); + } + + return samples; +} + +} // namespace detail + +template +std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_biases, + std::optional> dst_biases, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check) +{ + rmm::device_uvector src(0, handle.get_stream()); + rmm::device_uvector dst(0, handle.get_stream()); + + // Optimistically assume we can do this in one pass + size_t samples_in_this_batch = num_samples; + + // Normalize the biases and (for MG) determine how the biases are + // distributed across the GPUs. + std::optional> normalized_src_biases{std::nullopt}; + std::optional> gpu_src_biases{std::nullopt}; + std::optional> normalized_dst_biases{std::nullopt}; + std::optional> gpu_dst_biases{std::nullopt}; + + if (src_biases) + std::tie(normalized_src_biases, gpu_src_biases) = + detail::normalize_biases(handle, graph_view, *src_biases); + + if (dst_biases) + std::tie(normalized_dst_biases, gpu_dst_biases) = + detail::normalize_biases(handle, graph_view, *dst_biases); + + while (samples_in_this_batch > 0) { + if constexpr (multi_gpu) { + auto const comm_size = handle.get_comms().get_size(); + auto const comm_rank = handle.get_comms().get_rank(); + + samples_in_this_batch = + (samples_in_this_batch / static_cast(comm_size)) + + (static_cast(comm_rank) < (samples_in_this_batch % static_cast(comm_size)) + ? 1 + : 0); + } + + auto batch_src = create_local_samples( + handle, rng_state, graph_view, normalized_src_biases, gpu_src_biases, samples_in_this_batch); + auto batch_dst = create_local_samples( + handle, rng_state, graph_view, normalized_dst_biases, gpu_dst_biases, samples_in_this_batch); + + if constexpr (multi_gpu) { + auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + + std::tie(batch_src, batch_dst, std::ignore, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(batch_src), + std::move(batch_dst), + std::nullopt, + std::nullopt, + std::nullopt, + vertex_partition_range_lasts); + } + + if (remove_existing_edges) { + auto has_edge_flags = + graph_view.has_edge(handle, + raft::device_span{batch_src.data(), batch_src.size()}, + raft::device_span{batch_dst.data(), batch_dst.size()}, + do_expensive_check); + + auto begin_iter = thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()); + auto new_end = thrust::remove_if(handle.get_thrust_policy(), + begin_iter, + begin_iter + batch_src.size(), + has_edge_flags.begin(), + thrust::identity()); + + batch_src.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + batch_dst.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + } + + if (remove_duplicates) { + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + + auto new_end = thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + thrust::make_zip_iterator(batch_src.end(), batch_dst.end())); + + size_t new_size = + thrust::distance(thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), new_end); + + if (src.size() > 0) { + rmm::device_uvector new_src(src.size() + new_size, handle.get_stream()); + rmm::device_uvector new_dst(dst.size() + new_size, handle.get_stream()); + + thrust::merge(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + new_end, + thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + thrust::make_zip_iterator(new_src.begin(), new_dst.begin())); + + new_end = thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(new_src.begin(), new_dst.begin()), + thrust::make_zip_iterator(new_src.end(), new_dst.end())); + + new_size = + thrust::distance(thrust::make_zip_iterator(new_src.begin(), new_dst.begin()), new_end); + + src = std::move(new_src); + dst = std::move(new_dst); + } else { + src = std::move(batch_src); + dst = std::move(batch_dst); + } + + src.resize(new_size, handle.get_stream()); + dst.resize(new_size, handle.get_stream()); + } else if (src.size() > 0) { + size_t current_end = src.size(); + + src.resize(src.size() + batch_src.size(), handle.get_stream()); + dst.resize(dst.size() + batch_dst.size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + thrust::make_zip_iterator(batch_src.begin(), batch_dst.begin()), + thrust::make_zip_iterator(batch_src.end(), batch_dst.end()), + thrust::make_zip_iterator(src.begin(), dst.begin()) + current_end); + } else { + src = std::move(batch_src); + dst = std::move(batch_dst); + } + + if (exact_number_of_samples) { + size_t current_sample_size = src.size(); + if constexpr (multi_gpu) { + current_sample_size = cugraph::host_scalar_allreduce( + handle.get_comms(), current_sample_size, raft::comms::op_t::SUM, handle.get_stream()); + } + + // FIXME: We could oversample and discard the unnecessary samples + // to reduce the number of iterations in the outer loop, but it seems like + // exact_number_of_samples is an edge case not worth optimizing for at this time. + samples_in_this_batch = num_samples - current_sample_size; + } else { + samples_in_this_batch = 0; + } + } + + src.shrink_to_fit(handle.get_stream()); + dst.shrink_to_fit(handle.get_stream()); + + return std::make_tuple(std::move(src), std::move(dst)); +} + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_mg_v32_e32.cu b/cpp/src/sampling/negative_sampling_mg_v32_e32.cu new file mode 100644 index 00000000000..ce54d54d319 --- /dev/null +++ b/cpp/src/sampling/negative_sampling_mg_v32_e32.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_mg_v32_e64.cu b/cpp/src/sampling/negative_sampling_mg_v32_e64.cu new file mode 100644 index 00000000000..af4c28c0f1a --- /dev/null +++ b/cpp/src/sampling/negative_sampling_mg_v32_e64.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_mg_v64_e64.cu b/cpp/src/sampling/negative_sampling_mg_v64_e64.cu new file mode 100644 index 00000000000..c5691fb4644 --- /dev/null +++ b/cpp/src/sampling/negative_sampling_mg_v64_e64.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_sg_v32_e32.cu b/cpp/src/sampling/negative_sampling_sg_v32_e32.cu new file mode 100644 index 00000000000..3712414e4ec --- /dev/null +++ b/cpp/src/sampling/negative_sampling_sg_v32_e32.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_sg_v32_e64.cu b/cpp/src/sampling/negative_sampling_sg_v32_e64.cu new file mode 100644 index 00000000000..c66c31a4258 --- /dev/null +++ b/cpp/src/sampling/negative_sampling_sg_v32_e64.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/negative_sampling_sg_v64_e64.cu b/cpp/src/sampling/negative_sampling_sg_v64_e64.cu new file mode 100644 index 00000000000..e4fc50890e4 --- /dev/null +++ b/cpp/src/sampling/negative_sampling_sg_v64_e64.cu @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "negative_sampling_impl.cuh" + +#include +#include + +namespace cugraph { + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +template std::tuple, rmm::device_uvector> negative_sampling( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> src_bias, + std::optional> dst_bias, + size_t num_samples, + bool remove_duplicates, + bool remove_existing_edges, + bool exact_number_of_samples, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 3b0bc15df93..0b1d9dcdb56 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -18,8 +18,6 @@ // #pragma once -#include "utilities/graph_utils.cuh" - #include #include #include diff --git a/cpp/src/sampling/random_walks_impl.cuh b/cpp/src/sampling/random_walks_impl.cuh index d582893d756..6c10fc473f3 100644 --- a/cpp/src/sampling/random_walks_impl.cuh +++ b/cpp/src/sampling/random_walks_impl.cuh @@ -17,7 +17,10 @@ #pragma once #include "detail/graph_partition_utils.cuh" +#include "prims/detail/nbr_intersection.cuh" #include "prims/per_v_random_select_transform_outgoing_e.cuh" +#include "prims/property_op_utils.cuh" +#include "prims/update_edge_src_dst_property.cuh" #include "prims/vertex_frontier.cuh" #include @@ -25,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -46,13 +50,6 @@ namespace cugraph { namespace detail { -inline uint64_t get_current_time_nanoseconds() -{ - auto cur = std::chrono::steady_clock::now(); - return static_cast( - std::chrono::duration_cast(cur.time_since_epoch()).count()); -} - template struct sample_edges_op_t { template @@ -70,21 +67,129 @@ struct sample_edges_op_t { } }; +template +struct biased_random_walk_e_bias_op_t { + __device__ bias_t + operator()(vertex_t, vertex_t, bias_t src_out_weight_sum, thrust::nullopt_t, bias_t weight) const + { + return weight / src_out_weight_sum; + } +}; + +template +struct biased_sample_edges_op_t { + __device__ thrust::tuple operator()( + vertex_t, vertex_t dst, weight_t, thrust::nullopt_t, weight_t weight) const + { + return thrust::make_tuple(dst, weight); + } +}; + +template +struct node2vec_random_walk_e_bias_op_t { + bias_t p_{}; + bias_t q_{}; + raft::device_span intersection_offsets_{}; + raft::device_span intersection_indices_{}; + raft::device_span current_vertices_{}; + raft::device_span prev_vertices_{}; + + // Unweighted Bias Operator + template + __device__ std::enable_if_t, bias_t> operator()( + thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) const + { + // Check tag (prev vert) for destination + if (dst == thrust::get<1>(tagged_src)) { return 1.0 / p_; } + // Search zipped vertices for tagged src + auto lower_itr = thrust::lower_bound( + thrust::seq, + thrust::make_zip_iterator(current_vertices_.begin(), prev_vertices_.begin()), + thrust::make_zip_iterator(current_vertices_.end(), prev_vertices_.end()), + tagged_src); + auto low_idx = thrust::distance( + thrust::make_zip_iterator(current_vertices_.begin(), prev_vertices_.begin()), lower_itr); + auto intersection_index_first = intersection_indices_.begin() + intersection_offsets_[low_idx]; + auto intersection_index_last = + intersection_indices_.begin() + intersection_offsets_[low_idx + 1]; + auto itr = + thrust::lower_bound(thrust::seq, intersection_index_first, intersection_index_last, dst); + return (itr != intersection_index_last && *itr == dst) ? 1.0 : 1.0 / q_; + } + + // Weighted Bias Operator + template + __device__ std::enable_if_t, bias_t> operator()( + thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + W) const + { + // Check tag (prev vert) for destination + if (dst == thrust::get<1>(tagged_src)) { return 1.0 / p_; } + // Search zipped vertices for tagged src + auto lower_itr = thrust::lower_bound( + thrust::seq, + thrust::make_zip_iterator(current_vertices_.begin(), prev_vertices_.begin()), + thrust::make_zip_iterator(current_vertices_.end(), prev_vertices_.end()), + tagged_src); + auto low_idx = thrust::distance( + thrust::make_zip_iterator(current_vertices_.begin(), prev_vertices_.begin()), lower_itr); + auto intersection_index_first = intersection_indices_.begin() + intersection_offsets_[low_idx]; + auto intersection_index_last = + intersection_indices_.begin() + intersection_offsets_[low_idx + 1]; + auto itr = + thrust::lower_bound(thrust::seq, intersection_index_first, intersection_index_last, dst); + return (itr != intersection_index_last && *itr == dst) ? 1.0 : 1.0 / q_; + } +}; + +template +struct node2vec_sample_edges_op_t { + template + __device__ std::enable_if_t, vertex_t> operator()( + thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) const + { + return dst; + } + + template + __device__ std::enable_if_t, thrust::tuple> operator()( + thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + W w) const + { + return thrust::make_tuple(dst, w); + } +}; + template struct uniform_selector { - raft::random::RngState rng_state_; - - uniform_selector(uint64_t seed) : rng_state_(seed) {} + raft::random::RngState& rng_state_; + static constexpr bool is_second_order_ = false; template std::tuple, + std::optional>, std::optional>> follow_random_edge( raft::handle_t const& handle, GraphViewType const& graph_view, std::optional> edge_weight_view, - rmm::device_uvector const& current_vertices) + rmm::device_uvector&& current_vertices, + std::optional>&& previous_vertices) { using vertex_t = typename GraphViewType::vertex_type; @@ -133,30 +238,67 @@ struct uniform_selector { minors = std::move(sample_e_op_results); } - return std::make_tuple(std::move(minors), std::move(weights)); + return std::make_tuple(std::move(minors), std::move(previous_vertices), std::move(weights)); } }; template struct biased_selector { - uint64_t seed_{0}; + raft::random::RngState& rng_state_; + static constexpr bool is_second_order_ = false; template std::tuple, + std::optional>, std::optional>> follow_random_edge( raft::handle_t const& handle, GraphViewType const& graph_view, std::optional> edge_weight_view, - rmm::device_uvector const& current_vertices) + rmm::device_uvector&& current_vertices, + std::optional>&& previous_vertices) { - // To do biased sampling, I need out_weights instead of out_degrees. - // Then I generate a random float between [0, out_weights[v]). Then - // instead of making a decision based on the index I need to find - // upper_bound (or is it lower_bound) of the random number and - // the cumulative weight. - CUGRAPH_FAIL("biased sampling not implemented"); + // Create vertex frontier + using vertex_t = typename GraphViewType::vertex_type; + + using tag_t = void; + + cugraph::vertex_frontier_t vertex_frontier( + handle, 1); + + vertex_frontier.bucket(0).insert(current_vertices.begin(), current_vertices.end()); + + auto vertex_weight_sum = compute_out_weight_sums(handle, graph_view, *edge_weight_view); + edge_src_property_t edge_src_out_weight_sums(handle, graph_view); + update_edge_src_property(handle, + graph_view, + vertex_frontier.bucket(0).begin(), + vertex_frontier.bucket(0).end(), + vertex_weight_sum.data(), + edge_src_out_weight_sums.mutable_view()); + auto [sample_offsets, sample_e_op_results] = cugraph::per_v_random_select_transform_outgoing_e( + handle, + graph_view, + vertex_frontier.bucket(0), + edge_src_out_weight_sums.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + biased_random_walk_e_bias_op_t{}, + edge_src_out_weight_sums.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + biased_sample_edges_op_t{}, + rng_state_, + size_t{1}, + true, + std::make_optional( + thrust::make_tuple(vertex_t{cugraph::invalid_vertex_id::value}, weight_t{0.0}))); + + // Return results + return std::make_tuple(std::move(std::get<0>(sample_e_op_results)), + std::move(previous_vertices), + std::move(std::get<1>(sample_e_op_results))); } }; @@ -164,26 +306,232 @@ template struct node2vec_selector { weight_t p_; weight_t q_; - uint64_t seed_{0}; + raft::random::RngState& rng_state_; + static constexpr bool is_second_order_ = true; template std::tuple, + std::optional>, std::optional>> follow_random_edge( raft::handle_t const& handle, GraphViewType const& graph_view, std::optional> edge_weight_view, - rmm::device_uvector const& current_vertices) + rmm::device_uvector&& current_vertices, + std::optional>&& previous_vertices) { - // To do node2vec, I need the following: - // 1) transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v to compute the sum of the - // node2vec style weights - // 2) Generate a random number between [0, output_from_trdnioeebv[v]) - // 3) a sampling value that lets me pick the correct edge based on the same computation - // (essentially weighted sampling, but with a function that computes the weight rather - // than just using the edge weights) - CUGRAPH_FAIL("node2vec not implemented"); + // Create vertex frontier + using vertex_t = typename GraphViewType::vertex_type; + + using tag_t = vertex_t; + + // Zip previous and current vertices for nbr_intersection() + auto intersection_pairs = + thrust::make_zip_iterator(current_vertices.begin(), (*previous_vertices).begin()); + + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + graph_view, + cugraph::edge_dummy_property_t{}.view(), + intersection_pairs, + intersection_pairs + current_vertices.size(), + std::array{true, true}, + false); + + rmm::device_uvector intersection_counts(size_t{0}, handle.get_stream()); + rmm::device_uvector aggregate_offsets(size_t{0}, handle.get_stream()); + rmm::device_uvector aggregate_currents(size_t{0}, handle.get_stream()); + rmm::device_uvector aggregate_previous(size_t{0}, handle.get_stream()); + rmm::device_uvector aggregate_indices(size_t{0}, handle.get_stream()); + + // Aggregate intersection data across minor comm + if constexpr (GraphViewType::is_multi_gpu) { + intersection_counts.resize(intersection_offsets.size(), handle.get_stream()); + thrust::adjacent_difference(handle.get_thrust_policy(), + intersection_offsets.begin(), + intersection_offsets.end(), + intersection_counts.begin()); + + auto recv_counts = cugraph::host_scalar_allgather( + handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + current_vertices.size(), + handle.get_stream()); + + std::vector displacements(recv_counts.size()); + std::exclusive_scan(recv_counts.begin(), recv_counts.end(), displacements.begin(), size_t{0}); + + aggregate_offsets.resize(displacements.back() + recv_counts.back() + 1, handle.get_stream()); + aggregate_offsets.set_element_to_zero_async(aggregate_offsets.size() - 1, + handle.get_stream()); + + cugraph::device_allgatherv(handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + intersection_counts.begin() + 1, + aggregate_offsets.begin(), + recv_counts, + displacements, + handle.get_stream()); + + thrust::exclusive_scan(handle.get_thrust_policy(), + aggregate_offsets.begin(), + aggregate_offsets.end(), + aggregate_offsets.begin()); + + aggregate_currents.resize(displacements.back() + recv_counts.back(), handle.get_stream()); + + cugraph::device_allgatherv(handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + current_vertices.begin(), + aggregate_currents.begin(), + recv_counts, + displacements, + handle.get_stream()); + + aggregate_previous.resize(displacements.back() + recv_counts.back(), handle.get_stream()); + + cugraph::device_allgatherv(handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + (*previous_vertices).begin(), + aggregate_previous.begin(), + recv_counts, + displacements, + handle.get_stream()); + + recv_counts = cugraph::host_scalar_allgather( + handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + intersection_offsets.back_element(handle.get_stream()), + handle.get_stream()); + + displacements.resize(recv_counts.size()); + std::exclusive_scan(recv_counts.begin(), recv_counts.end(), displacements.begin(), size_t{0}); + + aggregate_indices.resize(displacements.back() + recv_counts.back(), handle.get_stream()); + + cugraph::device_allgatherv(handle.get_subcomm(cugraph::partition_manager::minor_comm_name()), + intersection_indices.begin(), + aggregate_indices.begin(), + recv_counts, + displacements, + handle.get_stream()); + } + + cugraph::vertex_frontier_t vertex_frontier( + handle, 1); + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(current_vertices.begin(), (*previous_vertices).begin()), + thrust::make_zip_iterator(current_vertices.end(), (*previous_vertices).end())); + + // Create data structs for results + rmm::device_uvector minors(0, handle.get_stream()); + std::optional> weights{std::nullopt}; + + if (edge_weight_view) { + auto [sample_offsets, sample_e_op_results] = + cugraph::per_v_random_select_transform_outgoing_e( + handle, + graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + GraphViewType::is_multi_gpu + ? node2vec_random_walk_e_bias_op_t{p_, + q_, + raft::device_span( + aggregate_offsets.data(), + aggregate_offsets.size()), + raft::device_span( + aggregate_indices.data(), + aggregate_indices.size()), + raft::device_span( + aggregate_currents.data(), + aggregate_currents.size()), + raft::device_span( + aggregate_previous.data(), + aggregate_previous.size())} + : node2vec_random_walk_e_bias_op_t{p_, + q_, + raft::device_span( + intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span( + intersection_indices.data(), + intersection_indices.size()), + raft::device_span< + vertex_t const>(current_vertices.data(), + current_vertices.size()), + raft::device_span( + (*previous_vertices).data(), + (*previous_vertices).size())}, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + node2vec_sample_edges_op_t{}, + rng_state_, + size_t{1}, + true, + std::make_optional(thrust::make_tuple( + vertex_t{cugraph::invalid_vertex_id::value}, weight_t{0.0}))); + minors = std::move(std::get<0>(sample_e_op_results)); + weights = std::move(std::get<1>(sample_e_op_results)); + } else { + auto [sample_offsets, sample_e_op_results] = + cugraph::per_v_random_select_transform_outgoing_e( + handle, + graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + GraphViewType::is_multi_gpu + ? node2vec_random_walk_e_bias_op_t{p_, + q_, + raft::device_span( + aggregate_offsets.data(), + aggregate_offsets.size()), + raft::device_span( + aggregate_indices.data(), + aggregate_indices.size()), + raft::device_span( + aggregate_currents.data(), + aggregate_currents.size()), + raft::device_span( + aggregate_previous.data(), + aggregate_previous.size())} + : node2vec_random_walk_e_bias_op_t{p_, + q_, + raft::device_span( + intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span( + intersection_indices.data(), + intersection_indices.size()), + raft::device_span< + vertex_t const>(current_vertices.data(), + current_vertices.size()), + raft::device_span( + (*previous_vertices).data(), + (*previous_vertices).size())}, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + node2vec_sample_edges_op_t{}, + rng_state_, + size_t{1}, + true, + std::make_optional(vertex_t{cugraph::invalid_vertex_id::value})); + minors = std::move(sample_e_op_results); + } + + *previous_vertices = std::move(current_vertices); + + return std::make_tuple(std::move(minors), std::move(previous_vertices), std::move(weights)); } }; @@ -221,6 +569,16 @@ random_walk_impl(raft::handle_t const& handle, ? std::make_optional>(0, handle.get_stream()) : std::nullopt; + auto previous_vertices = (random_selector.is_second_order_) + ? std::make_optional>( + current_vertices.size(), handle.get_stream()) + : std::nullopt; + if (previous_vertices) { + raft::copy((*previous_vertices).data(), + start_vertices.data(), + start_vertices.size(), + handle.get_stream()); + } raft::copy( current_vertices.data(), start_vertices.data(), start_vertices.size(), handle.get_stream()); detail::sequence_fill( @@ -255,25 +613,73 @@ random_walk_impl(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - // Shuffle vertices to correct GPU to compute random indices - std::forward_as_tuple(std::tie(current_vertices, current_gpu, current_position), - std::ignore) = - cugraph::groupby_gpu_id_and_shuffle_values( - handle.get_comms(), + if (previous_vertices) { + std::forward_as_tuple( + std::tie(current_vertices, current_gpu, current_position, previous_vertices), + std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator(current_vertices.begin(), + current_gpu.begin(), + current_position.begin(), + previous_vertices->begin()), + thrust::make_zip_iterator(current_vertices.end(), + current_gpu.end(), + current_position.end(), + previous_vertices->end()), + [key_func = + cugraph::detail::compute_gpu_id_from_int_vertex_t{ + {vertex_partition_range_lasts.begin(), vertex_partition_range_lasts.size()}, + major_comm_size, + minor_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); + } else { + // Shuffle vertices to correct GPU to compute random indices + std::forward_as_tuple(std::tie(current_vertices, current_gpu, current_position), + std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + thrust::make_zip_iterator( + current_vertices.begin(), current_gpu.begin(), current_position.begin()), + thrust::make_zip_iterator( + current_vertices.end(), current_gpu.end(), current_position.end()), + [key_func = + cugraph::detail::compute_gpu_id_from_int_vertex_t{ + {vertex_partition_range_lasts.begin(), vertex_partition_range_lasts.size()}, + major_comm_size, + minor_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); + } + } + + // Sort for nbr_intersection, must sort all together + if (previous_vertices) { + if constexpr (multi_gpu) { + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(current_vertices.begin(), + (*previous_vertices).begin(), + current_position.begin(), + current_gpu.begin()), + thrust::make_zip_iterator(current_vertices.end(), + (*previous_vertices).end(), + current_position.end(), + current_gpu.end())); + } else { + thrust::sort( + handle.get_thrust_policy(), thrust::make_zip_iterator( - current_vertices.begin(), current_gpu.begin(), current_position.begin()), + current_vertices.begin(), (*previous_vertices).begin(), current_position.begin()), thrust::make_zip_iterator( - current_vertices.end(), current_gpu.end(), current_position.end()), - [key_func = - cugraph::detail::compute_gpu_id_from_int_vertex_t{ - {vertex_partition_range_lasts.begin(), vertex_partition_range_lasts.size()}, - major_comm_size, - minor_comm_size}] __device__(auto val) { return key_func(thrust::get<0>(val)); }, - handle.get_stream()); + current_vertices.end(), (*previous_vertices).end(), current_position.end())); + } } - std::tie(current_vertices, new_weights) = - random_selector.follow_random_edge(handle, graph_view, edge_weight_view, current_vertices); + std::tie(current_vertices, previous_vertices, new_weights) = + random_selector.follow_random_edge(handle, + graph_view, + edge_weight_view, + std::move(current_vertices), + std::move(previous_vertices)); // FIXME: remove_if has a 32-bit overflow issue // (https://github.com/NVIDIA/thrust/issues/1302) Seems unlikely here (the goal of @@ -281,164 +687,244 @@ random_walk_impl(raft::handle_t const& handle, CUGRAPH_EXPECTS( current_vertices.size() < static_cast(std::numeric_limits::max()), "remove_if will fail, current_vertices.size() is too large"); - + size_t compacted_length{0}; if constexpr (multi_gpu) { if (result_weights) { - auto input_iter = thrust::make_zip_iterator(current_vertices.begin(), - new_weights->begin(), - current_gpu.begin(), - current_position.begin()); - - auto compacted_length = thrust::distance( - input_iter, - thrust::remove_if(handle.get_thrust_policy(), - input_iter, - input_iter + current_vertices.size(), - current_vertices.begin(), - [] __device__(auto dst) { - return (dst == cugraph::invalid_vertex_id::value); - })); - - current_vertices.resize(compacted_length, handle.get_stream()); - new_weights->resize(compacted_length, handle.get_stream()); - current_gpu.resize(compacted_length, handle.get_stream()); - current_position.resize(compacted_length, handle.get_stream()); - - // Shuffle back to original GPU - auto current_iter = thrust::make_zip_iterator(current_vertices.begin(), + if (previous_vertices) { + auto input_iter = thrust::make_zip_iterator(current_vertices.begin(), + new_weights->begin(), + current_gpu.begin(), + current_position.begin(), + previous_vertices->begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } else { + auto input_iter = thrust::make_zip_iterator(current_vertices.begin(), new_weights->begin(), current_gpu.begin(), current_position.begin()); - std::forward_as_tuple( - std::tie(current_vertices, *new_weights, current_gpu, current_position), std::ignore) = - cugraph::groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - current_iter, - current_iter + current_vertices.size(), - [] __device__(auto val) { return thrust::get<2>(val); }, - handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - current_vertices.begin(), new_weights->begin(), current_position.begin()), - thrust::make_zip_iterator( - current_vertices.end(), new_weights->end(), current_position.end()), - [result_verts = result_vertices.data(), - result_wgts = result_weights->data(), - level, - max_length] __device__(auto tuple) { - vertex_t v = thrust::get<0>(tuple); - weight_t w = thrust::get<1>(tuple); - size_t pos = thrust::get<2>(tuple); - result_verts[pos * (max_length + 1) + level + 1] = v; - result_wgts[pos * max_length + level] = w; - }); + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } } else { - auto input_iter = thrust::make_zip_iterator( - current_vertices.begin(), current_gpu.begin(), current_position.begin()); - - auto compacted_length = thrust::distance( - input_iter, - thrust::remove_if(handle.get_thrust_policy(), - input_iter, - input_iter + current_vertices.size(), - current_vertices.begin(), - [] __device__(auto dst) { - return (dst == cugraph::invalid_vertex_id::value); - })); - - current_vertices.resize(compacted_length, handle.get_stream()); - current_gpu.resize(compacted_length, handle.get_stream()); - current_position.resize(compacted_length, handle.get_stream()); - - // Shuffle back to original GPU - auto current_iter = thrust::make_zip_iterator( - current_vertices.begin(), current_gpu.begin(), current_position.begin()); - - std::forward_as_tuple(std::tie(current_vertices, current_gpu, current_position), - std::ignore) = - cugraph::groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - current_iter, - current_iter + current_vertices.size(), - [] __device__(auto val) { return thrust::get<1>(val); }, - handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator(current_vertices.begin(), current_position.begin()), - thrust::make_zip_iterator(current_vertices.end(), current_position.end()), - [result_verts = result_vertices.data(), level, max_length] __device__(auto tuple) { - vertex_t v = thrust::get<0>(tuple); - size_t pos = thrust::get<1>(tuple); - result_verts[pos * (max_length + 1) + level + 1] = v; - }); + if (previous_vertices) { + auto input_iter = thrust::make_zip_iterator(current_vertices.begin(), + current_gpu.begin(), + current_position.begin(), + previous_vertices->begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } else { + auto input_iter = thrust::make_zip_iterator( + current_vertices.begin(), current_gpu.begin(), current_position.begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } } } else { if (result_weights) { - auto input_iter = thrust::make_zip_iterator( - current_vertices.begin(), new_weights->begin(), current_position.begin()); - - auto compacted_length = thrust::distance( - input_iter, - thrust::remove_if(handle.get_thrust_policy(), - input_iter, - input_iter + current_vertices.size(), - current_vertices.begin(), - [] __device__(auto dst) { - return (dst == cugraph::invalid_vertex_id::value); - })); - - current_vertices.resize(compacted_length, handle.get_stream()); - new_weights->resize(compacted_length, handle.get_stream()); - current_position.resize(compacted_length, handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - current_vertices.begin(), new_weights->begin(), current_position.begin()), - thrust::make_zip_iterator( - current_vertices.end(), new_weights->end(), current_position.end()), - [result_verts = result_vertices.data(), - result_wgts = result_weights->data(), - level, - max_length] __device__(auto tuple) { - vertex_t v = thrust::get<0>(tuple); - weight_t w = thrust::get<1>(tuple); - size_t pos = thrust::get<2>(tuple); - result_verts[pos * (max_length + 1) + level + 1] = v; - result_wgts[pos * max_length + level] = w; - }); + if (previous_vertices) { + auto input_iter = thrust::make_zip_iterator(current_vertices.begin(), + new_weights->begin(), + current_position.begin(), + previous_vertices->begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } else { + auto input_iter = thrust::make_zip_iterator( + current_vertices.begin(), new_weights->begin(), current_position.begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } } else { - auto input_iter = - thrust::make_zip_iterator(current_vertices.begin(), current_position.begin()); - - auto compacted_length = thrust::distance( - input_iter, - thrust::remove_if(handle.get_thrust_policy(), - input_iter, - input_iter + current_vertices.size(), - current_vertices.begin(), - [] __device__(auto dst) { - return (dst == cugraph::invalid_vertex_id::value); - })); - - current_vertices.resize(compacted_length, handle.get_stream()); - current_position.resize(compacted_length, handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator(current_vertices.begin(), current_position.begin()), - thrust::make_zip_iterator(current_vertices.end(), current_position.end()), - [result_verts = result_vertices.data(), level, max_length] __device__(auto tuple) { - vertex_t v = thrust::get<0>(tuple); - size_t pos = thrust::get<1>(tuple); - result_verts[pos * (max_length + 1) + level + 1] = v; - }); + if (previous_vertices) { + auto input_iter = thrust::make_zip_iterator( + current_vertices.begin(), current_position.begin(), previous_vertices->begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } else { + auto input_iter = + thrust::make_zip_iterator(current_vertices.begin(), current_position.begin()); + + compacted_length = thrust::distance( + input_iter, + thrust::remove_if(handle.get_thrust_policy(), + input_iter, + input_iter + current_vertices.size(), + current_vertices.begin(), + [] __device__(auto dst) { + return (dst == cugraph::invalid_vertex_id::value); + })); + } + } + } + + // Moved out of if statements to cut down on code duplication + current_vertices.resize(compacted_length, handle.get_stream()); + current_vertices.shrink_to_fit(handle.get_stream()); + current_position.resize(compacted_length, handle.get_stream()); + current_position.shrink_to_fit(handle.get_stream()); + if (result_weights) { + new_weights->resize(compacted_length, handle.get_stream()); + new_weights->shrink_to_fit(handle.get_stream()); + } + if (previous_vertices) { + previous_vertices->resize(compacted_length, handle.get_stream()); + previous_vertices->shrink_to_fit(handle.get_stream()); + } + if constexpr (multi_gpu) { + current_gpu.resize(compacted_length, handle.get_stream()); + current_gpu.shrink_to_fit(handle.get_stream()); + + // Shuffle back to original GPU + if (previous_vertices) { + if (result_weights) { + auto current_iter = thrust::make_zip_iterator(current_vertices.begin(), + new_weights->begin(), + current_gpu.begin(), + current_position.begin(), + previous_vertices->begin()); + + std::forward_as_tuple( + std::tie( + current_vertices, *new_weights, current_gpu, current_position, *previous_vertices), + std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + current_iter, + current_iter + current_vertices.size(), + [] __device__(auto val) { return thrust::get<2>(val); }, + handle.get_stream()); + } else { + auto current_iter = thrust::make_zip_iterator(current_vertices.begin(), + current_gpu.begin(), + current_position.begin(), + previous_vertices->begin()); + + std::forward_as_tuple( + std::tie(current_vertices, current_gpu, current_position, *previous_vertices), + std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + current_iter, + current_iter + current_vertices.size(), + [] __device__(auto val) { return thrust::get<1>(val); }, + handle.get_stream()); + } + } else { + if (result_weights) { + auto current_iter = thrust::make_zip_iterator(current_vertices.begin(), + new_weights->begin(), + current_gpu.begin(), + current_position.begin()); + + std::forward_as_tuple( + std::tie(current_vertices, *new_weights, current_gpu, current_position), std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + current_iter, + current_iter + current_vertices.size(), + [] __device__(auto val) { return thrust::get<2>(val); }, + handle.get_stream()); + } else { + auto current_iter = thrust::make_zip_iterator( + current_vertices.begin(), current_gpu.begin(), current_position.begin()); + + std::forward_as_tuple(std::tie(current_vertices, current_gpu, current_position), + std::ignore) = + cugraph::groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + current_iter, + current_iter + current_vertices.size(), + [] __device__(auto val) { return thrust::get<1>(val); }, + handle.get_stream()); + } } } + + if (result_weights) { + thrust::for_each(handle.get_thrust_policy(), + thrust::make_zip_iterator( + current_vertices.begin(), new_weights->begin(), current_position.begin()), + thrust::make_zip_iterator( + current_vertices.end(), new_weights->end(), current_position.end()), + [result_verts = result_vertices.data(), + result_wgts = result_weights->data(), + level, + max_length] __device__(auto tuple) { + vertex_t v = thrust::get<0>(tuple); + weight_t w = thrust::get<1>(tuple); + size_t pos = thrust::get<2>(tuple); + result_verts[pos * (max_length + 1) + level + 1] = v; + result_wgts[pos * max_length + level] = w; + }); + } else { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_zip_iterator(current_vertices.begin(), current_position.begin()), + thrust::make_zip_iterator(current_vertices.end(), current_position.end()), + [result_verts = result_vertices.data(), level, max_length] __device__(auto tuple) { + vertex_t v = thrust::get<0>(tuple); + size_t pos = thrust::get<1>(tuple); + result_verts[pos * (max_length + 1) + level + 1] = v; + }); + } } return std::make_tuple(std::move(result_vertices), std::move(result_weights)); @@ -449,11 +935,11 @@ random_walk_impl(raft::handle_t const& handle, template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed) + size_t max_length) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -462,18 +948,17 @@ uniform_random_walks(raft::handle_t const& handle, edge_weight_view, start_vertices, max_length, - detail::uniform_selector( - (seed == 0 ? detail::get_current_time_nanoseconds() : seed))); + detail::uniform_selector{rng_state}); } template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed) + size_t max_length) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -483,30 +968,28 @@ biased_random_walks(raft::handle_t const& handle, std::optional>{edge_weight_view}, start_vertices, max_length, - detail::biased_selector{(seed == 0 ? detail::get_current_time_nanoseconds() : seed)}); + detail::biased_selector{rng_state}); } template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, weight_t p, - weight_t q, - uint64_t seed) + weight_t q) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - return detail::random_walk_impl( - handle, - graph_view, - edge_weight_view, - start_vertices, - max_length, - detail::node2vec_selector{ - p, q, (seed == 0 ? detail::get_current_time_nanoseconds() : seed)}); + return detail::random_walk_impl(handle, + graph_view, + edge_weight_view, + start_vertices, + max_length, + detail::node2vec_selector{p, q, rng_state}); } } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_mg_v32_e32.cu b/cpp/src/sampling/random_walks_mg_v32_e32.cu index 421d3e9c818..abe5386da1c 100644 --- a/cpp/src/sampling/random_walks_mg_v32_e32.cu +++ b/cpp/src/sampling/random_walks_mg_v32_e32.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_mg_v32_e64.cu b/cpp/src/sampling/random_walks_mg_v32_e64.cu index d38af65a505..b1bf1a19b77 100644 --- a/cpp/src/sampling/random_walks_mg_v32_e64.cu +++ b/cpp/src/sampling/random_walks_mg_v32_e64.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_mg_v64_e64.cu b/cpp/src/sampling/random_walks_mg_v64_e64.cu index 9dedc893242..13cc899e50d 100644 --- a/cpp/src/sampling/random_walks_mg_v64_e64.cu +++ b/cpp/src/sampling/random_walks_mg_v64_e64.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_sg_v32_e32.cu b/cpp/src/sampling/random_walks_sg_v32_e32.cu index 7b64d107250..383917c0248 100644 --- a/cpp/src/sampling/random_walks_sg_v32_e32.cu +++ b/cpp/src/sampling/random_walks_sg_v32_e32.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_sg_v32_e64.cu b/cpp/src/sampling/random_walks_sg_v32_e64.cu index d9ea09f36ef..98d2bb02d88 100644 --- a/cpp/src/sampling/random_walks_sg_v32_e64.cu +++ b/cpp/src/sampling/random_walks_sg_v32_e64.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/random_walks_sg_v64_e64.cu b/cpp/src/sampling/random_walks_sg_v64_e64.cu index 0b9be107276..c139acec4b7 100644 --- a/cpp/src/sampling/random_walks_sg_v64_e64.cu +++ b/cpp/src/sampling/random_walks_sg_v64_e64.cu @@ -22,54 +22,54 @@ namespace cugraph { template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> uniform_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> biased_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, edge_property_view_t edge_weight_view, raft::device_span start_vertices, - size_t max_length, - uint64_t seed); + size_t max_length); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, float p, - float q, - uint64_t seed); + float q); template std::tuple, std::optional>> node2vec_random_walks(raft::handle_t const& handle, + raft::random::RngState& rng_state, graph_view_t const& graph_view, std::optional> edge_weight_view, raft::device_span start_vertices, size_t max_length, double p, - double q, - uint64_t seed); + double q); } // namespace cugraph diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh deleted file mode 100644 index f5bc3ef6d2e..00000000000 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "prims/kv_store.cuh" - -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -// FIXME: deprecated, to be deleted -namespace cugraph { - -namespace { - -// output sorted by (primary key:label_index, secondary key:vertex) -template -std::tuple> /* label indices */, - rmm::device_uvector /* vertices */, - std::optional> /* minimum hops for the vertices */, - std::optional> /* label offsets for the output */> -compute_min_hop_for_unique_label_vertex_pairs( - raft::handle_t const& handle, - raft::device_span vertices, - std::optional> hops, - std::optional> label_indices, - std::optional> label_offsets) -{ - auto approx_edges_to_sort_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * - (1 << 20) /* tuning parameter */; // for segmented sort - - if (label_indices) { - auto num_labels = (*label_offsets).size() - 1; - - rmm::device_uvector tmp_label_indices((*label_indices).size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*label_indices).begin(), - (*label_indices).end(), - tmp_label_indices.begin()); - - rmm::device_uvector tmp_vertices(0, handle.get_stream()); - std::optional> tmp_hops{std::nullopt}; - - if (hops) { - tmp_vertices.resize(vertices.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); - tmp_hops = rmm::device_uvector((*hops).size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), (*tmp_hops).begin()); - - auto triplet_first = thrust::make_zip_iterator( - tmp_label_indices.begin(), tmp_vertices.begin(), (*tmp_hops).begin()); - thrust::sort( - handle.get_thrust_policy(), triplet_first, triplet_first + tmp_label_indices.size()); - auto key_first = thrust::make_zip_iterator(tmp_label_indices.begin(), tmp_vertices.begin()); - auto num_uniques = static_cast( - thrust::distance(key_first, - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - key_first, - key_first + tmp_label_indices.size(), - (*tmp_hops).begin())))); - tmp_label_indices.resize(num_uniques, handle.get_stream()); - tmp_vertices.resize(num_uniques, handle.get_stream()); - (*tmp_hops).resize(num_uniques, handle.get_stream()); - tmp_label_indices.shrink_to_fit(handle.get_stream()); - tmp_vertices.shrink_to_fit(handle.get_stream()); - (*tmp_hops).shrink_to_fit(handle.get_stream()); - } else { - rmm::device_uvector segment_sorted_vertices(vertices.size(), handle.get_stream()); - - rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( - handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); - auto num_chunks = h_label_offsets.size() - 1; - - for (size_t i = 0; i < num_chunks; ++i) { - size_t tmp_storage_bytes{0}; - - auto offset_first = - thrust::make_transform_iterator((*label_offsets).data() + h_label_offsets[i], - detail::shift_left_t{h_edge_offsets[i]}); - cub::DeviceSegmentedSort::SortKeys(static_cast(nullptr), - tmp_storage_bytes, - vertices.begin() + h_edge_offsets[i], - segment_sorted_vertices.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - - if (tmp_storage_bytes > d_tmp_storage.size()) { - d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); - } - - cub::DeviceSegmentedSort::SortKeys(d_tmp_storage.data(), - tmp_storage_bytes, - vertices.begin() + h_edge_offsets[i], - segment_sorted_vertices.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - } - d_tmp_storage.resize(0, handle.get_stream()); - d_tmp_storage.shrink_to_fit(handle.get_stream()); - - auto pair_first = - thrust::make_zip_iterator(tmp_label_indices.begin(), segment_sorted_vertices.begin()); - auto num_uniques = static_cast(thrust::distance( - pair_first, - thrust::unique( - handle.get_thrust_policy(), pair_first, pair_first + tmp_label_indices.size()))); - tmp_label_indices.resize(num_uniques, handle.get_stream()); - segment_sorted_vertices.resize(num_uniques, handle.get_stream()); - tmp_label_indices.shrink_to_fit(handle.get_stream()); - segment_sorted_vertices.shrink_to_fit(handle.get_stream()); - - tmp_vertices = std::move(segment_sorted_vertices); - } - - rmm::device_uvector tmp_label_offsets(num_labels + 1, handle.get_stream()); - tmp_label_offsets.set_element_to_zero_async(0, handle.get_stream()); - thrust::upper_bound(handle.get_thrust_policy(), - tmp_label_indices.begin(), - tmp_label_indices.end(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(num_labels), - tmp_label_offsets.begin() + 1); - - return std::make_tuple(std::move(tmp_label_indices), - std::move(tmp_vertices), - std::move(tmp_hops), - std::move(tmp_label_offsets)); - } else { - rmm::device_uvector tmp_vertices(vertices.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); - - if (hops) { - rmm::device_uvector tmp_hops((*hops).size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), tmp_hops.begin()); - - auto pair_first = thrust::make_zip_iterator( - tmp_vertices.begin(), tmp_hops.begin()); // vertex is a primary key, hop is a secondary key - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_vertices.size()); - tmp_vertices.resize( - thrust::distance(tmp_vertices.begin(), - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - tmp_vertices.begin(), - tmp_vertices.end(), - tmp_hops.begin()))), - handle.get_stream()); - tmp_hops.resize(tmp_vertices.size(), handle.get_stream()); - - return std::make_tuple( - std::nullopt, std::move(tmp_vertices), std::move(tmp_hops), std::nullopt); - } else { - thrust::sort(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end()); - tmp_vertices.resize( - thrust::distance( - tmp_vertices.begin(), - thrust::unique(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end())), - handle.get_stream()); - tmp_vertices.shrink_to_fit(handle.get_stream()); - - return std::make_tuple(std::nullopt, std::move(tmp_vertices), std::nullopt, std::nullopt); - } - } -} - -template -std::tuple, std::optional>> -compute_renumber_map(raft::handle_t const& handle, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts, - std::optional> edgelist_hops, - std::optional> label_offsets) -{ - auto approx_edges_to_sort_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * - (1 << 20) /* tuning parameter */; // for segmented sort - - std::optional> edgelist_label_indices{std::nullopt}; - if (label_offsets) { - edgelist_label_indices = - detail::expand_sparse_offsets(*label_offsets, label_index_t{0}, handle.get_stream()); - } - - auto [unique_label_src_pair_label_indices, - unique_label_src_pair_vertices, - unique_label_src_pair_hops, - unique_label_src_pair_label_offsets] = - compute_min_hop_for_unique_label_vertex_pairs( - handle, - edgelist_srcs, - edgelist_hops, - edgelist_label_indices ? std::make_optional>( - (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) - : std::nullopt, - label_offsets); - - auto [unique_label_dst_pair_label_indices, - unique_label_dst_pair_vertices, - unique_label_dst_pair_hops, - unique_label_dst_pair_label_offsets] = - compute_min_hop_for_unique_label_vertex_pairs( - handle, - edgelist_dsts, - edgelist_hops, - edgelist_label_indices ? std::make_optional>( - (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) - : std::nullopt, - label_offsets); - - edgelist_label_indices = std::nullopt; - - if (label_offsets) { - auto num_labels = (*label_offsets).size() - 1; - - rmm::device_uvector renumber_map(0, handle.get_stream()); - rmm::device_uvector renumber_map_label_indices(0, handle.get_stream()); - - renumber_map.reserve( - (*unique_label_src_pair_label_indices).size() + (*unique_label_dst_pair_label_indices).size(), - handle.get_stream()); - renumber_map_label_indices.reserve(renumber_map.capacity(), handle.get_stream()); - - auto num_chunks = (edgelist_srcs.size() + (approx_edges_to_sort_per_iteration - 1)) / - approx_edges_to_sort_per_iteration; - auto chunk_size = (num_chunks > 0) ? ((num_labels + (num_chunks - 1)) / num_chunks) : 0; - - size_t copy_offset{0}; - for (size_t i = 0; i < num_chunks; ++i) { - auto src_start_offset = - (*unique_label_src_pair_label_offsets).element(chunk_size * i, handle.get_stream()); - auto src_end_offset = - (*unique_label_src_pair_label_offsets) - .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); - auto dst_start_offset = - (*unique_label_dst_pair_label_offsets).element(chunk_size * i, handle.get_stream()); - auto dst_end_offset = - (*unique_label_dst_pair_label_offsets) - .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); - - rmm::device_uvector merged_label_indices( - (src_end_offset - src_start_offset) + (dst_end_offset - dst_start_offset), - handle.get_stream()); - rmm::device_uvector merged_vertices(merged_label_indices.size(), - handle.get_stream()); - rmm::device_uvector merged_flags(merged_label_indices.size(), handle.get_stream()); - - if (edgelist_hops) { - rmm::device_uvector merged_hops(merged_label_indices.size(), handle.get_stream()); - auto src_quad_first = - thrust::make_zip_iterator((*unique_label_src_pair_label_indices).begin(), - unique_label_src_pair_vertices.begin(), - (*unique_label_src_pair_hops).begin(), - thrust::make_constant_iterator(int8_t{0})); - auto dst_quad_first = - thrust::make_zip_iterator((*unique_label_dst_pair_label_indices).begin(), - unique_label_dst_pair_vertices.begin(), - (*unique_label_dst_pair_hops).begin(), - thrust::make_constant_iterator(int8_t{1})); - thrust::merge(handle.get_thrust_policy(), - src_quad_first + src_start_offset, - src_quad_first + src_end_offset, - dst_quad_first + dst_start_offset, - dst_quad_first + dst_end_offset, - thrust::make_zip_iterator(merged_label_indices.begin(), - merged_vertices.begin(), - merged_hops.begin(), - merged_flags.begin())); - - auto unique_key_first = - thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); - merged_label_indices.resize( - thrust::distance( - unique_key_first, - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), - unique_key_first, - unique_key_first + merged_label_indices.size(), - thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), - handle.get_stream()); - merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); - merged_hops.resize(merged_label_indices.size(), handle.get_stream()); - merged_flags.resize(merged_label_indices.size(), handle.get_stream()); - auto sort_key_first = thrust::make_zip_iterator( - merged_label_indices.begin(), merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_label_indices.size(), - merged_vertices.begin()); - } else { - auto src_triplet_first = - thrust::make_zip_iterator((*unique_label_src_pair_label_indices).begin(), - unique_label_src_pair_vertices.begin(), - thrust::make_constant_iterator(int8_t{0})); - auto dst_triplet_first = - thrust::make_zip_iterator((*unique_label_dst_pair_label_indices).begin(), - unique_label_dst_pair_vertices.begin(), - thrust::make_constant_iterator(int8_t{1})); - thrust::merge( - handle.get_thrust_policy(), - src_triplet_first + src_start_offset, - src_triplet_first + src_end_offset, - dst_triplet_first + dst_start_offset, - dst_triplet_first + dst_end_offset, - thrust::make_zip_iterator( - merged_label_indices.begin(), merged_vertices.begin(), merged_flags.begin())); - - auto unique_key_first = - thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); - merged_label_indices.resize( - thrust::distance( - unique_key_first, - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - unique_key_first, - unique_key_first + merged_label_indices.size(), - merged_flags.begin()))), - handle.get_stream()); - merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); - merged_flags.resize(merged_label_indices.size(), handle.get_stream()); - auto sort_key_first = - thrust::make_zip_iterator(merged_label_indices.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_label_indices.size(), - merged_vertices.begin()); - } - - renumber_map.resize(copy_offset + merged_vertices.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - merged_vertices.begin(), - merged_vertices.end(), - renumber_map.begin() + copy_offset); - renumber_map_label_indices.resize(copy_offset + merged_label_indices.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - merged_label_indices.begin(), - merged_label_indices.end(), - renumber_map_label_indices.begin() + copy_offset); - - copy_offset += merged_vertices.size(); - } - - renumber_map.shrink_to_fit(handle.get_stream()); - renumber_map_label_indices.shrink_to_fit(handle.get_stream()); - - return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_indices)); - } else { - if (edgelist_hops) { - rmm::device_uvector merged_vertices( - unique_label_src_pair_vertices.size() + unique_label_dst_pair_vertices.size(), - handle.get_stream()); - rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); - rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); - auto src_triplet_first = thrust::make_zip_iterator(unique_label_src_pair_vertices.begin(), - (*unique_label_src_pair_hops).begin(), - thrust::make_constant_iterator(int8_t{0})); - auto dst_triplet_first = thrust::make_zip_iterator(unique_label_dst_pair_vertices.begin(), - (*unique_label_dst_pair_hops).begin(), - thrust::make_constant_iterator(int8_t{1})); - thrust::merge(handle.get_thrust_policy(), - src_triplet_first, - src_triplet_first + unique_label_src_pair_vertices.size(), - dst_triplet_first, - dst_triplet_first + unique_label_dst_pair_vertices.size(), - thrust::make_zip_iterator( - merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); - - unique_label_src_pair_vertices.resize(0, handle.get_stream()); - unique_label_src_pair_vertices.shrink_to_fit(handle.get_stream()); - unique_label_src_pair_hops = std::nullopt; - unique_label_dst_pair_vertices.resize(0, handle.get_stream()); - unique_label_dst_pair_vertices.shrink_to_fit(handle.get_stream()); - unique_label_dst_pair_hops = std::nullopt; - - merged_vertices.resize( - thrust::distance(merged_vertices.begin(), - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), - merged_vertices.begin(), - merged_vertices.end(), - thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), - handle.get_stream()); - merged_hops.resize(merged_vertices.size(), handle.get_stream()); - merged_flags.resize(merged_vertices.size(), handle.get_stream()); - - auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - merged_vertices.begin()); - - return std::make_tuple(std::move(merged_vertices), std::nullopt); - } else { - rmm::device_uvector output_vertices(unique_label_dst_pair_vertices.size(), - handle.get_stream()); - auto output_last = thrust::set_difference(handle.get_thrust_policy(), - unique_label_dst_pair_vertices.begin(), - unique_label_dst_pair_vertices.end(), - unique_label_src_pair_vertices.begin(), - unique_label_src_pair_vertices.end(), - output_vertices.begin()); - - auto num_unique_srcs = unique_label_src_pair_vertices.size(); - auto renumber_map = std::move(unique_label_src_pair_vertices); - renumber_map.resize( - renumber_map.size() + thrust::distance(output_vertices.begin(), output_last), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - output_vertices.begin(), - output_last, - renumber_map.begin() + num_unique_srcs); - - return std::make_tuple(std::move(renumber_map), std::nullopt); - } - } -} - -} // namespace - -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - std::optional>> -renumber_sampled_edgelist( - raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional> edgelist_hops, - std::optional, raft::device_span>> - label_offsets, - bool do_expensive_check) -{ - using label_index_t = uint32_t; - - // 1. check input arguments - - CUGRAPH_EXPECTS(!label_offsets || (std::get<0>(*label_offsets).size() <= - std::numeric_limits::max()), - "Invalid input arguments: current implementation assumes that the number of " - "unique labels is no larger than std::numeric_limits::max()."); - - CUGRAPH_EXPECTS( - edgelist_srcs.size() == edgelist_dsts.size(), - "Invalid input arguments: edgelist_srcs.size() and edgelist_dsts.size() should coincide."); - CUGRAPH_EXPECTS(!edgelist_hops.has_value() || (edgelist_srcs.size() == (*edgelist_hops).size()), - "Invalid input arguments: if edgelist_hops is valid, (*edgelist_hops).size() and " - "edgelist_srcs.size() should coincide."); - CUGRAPH_EXPECTS(!label_offsets.has_value() || - (std::get<1>(*label_offsets).size() == std::get<0>(*label_offsets).size() + 1), - "Invalid input arguments: if label_offsets is valid, " - "std::get<1>(label_offsets).size() (size of the offset array) should be " - "std::get<0>(label_offsets).size() (number of unique labels) + 1."); - - if (do_expensive_check) { - if (label_offsets) { - CUGRAPH_EXPECTS(thrust::is_sorted(handle.get_thrust_policy(), - std::get<1>(*label_offsets).begin(), - std::get<1>(*label_offsets).end()), - "Invalid input arguments: if label_offsets is valid, " - "std::get<1>(*label_offsets) should be sorted."); - size_t back_element{}; - raft::update_host( - &back_element, - std::get<1>(*label_offsets).data() + (std::get<1>(*label_offsets).size() - 1), - size_t{1}, - handle.get_stream()); - handle.get_stream(); - CUGRAPH_EXPECTS(back_element == edgelist_srcs.size(), - "Invalid input arguments: if label_offsets is valid, the last element of " - "std::get<1>(*label_offsets) and edgelist_srcs.size() should coincide."); - } - } - - // 2. compute renumber_map - - auto [renumber_map, renumber_map_label_indices] = compute_renumber_map( - handle, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - edgelist_hops, - label_offsets ? std::make_optional>(std::get<1>(*label_offsets)) - : std::nullopt); - - // 3. compute renumber map offsets for each label - - std::optional> renumber_map_label_offsets{}; - if (label_offsets) { - auto num_unique_labels = thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator((*renumber_map_label_indices).size()), - detail::is_first_in_run_t{(*renumber_map_label_indices).data()}); - rmm::device_uvector unique_label_indices(num_unique_labels, handle.get_stream()); - rmm::device_uvector vertex_counts(num_unique_labels, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - (*renumber_map_label_indices).begin(), - (*renumber_map_label_indices).end(), - thrust::make_constant_iterator(size_t{1}), - unique_label_indices.begin(), - vertex_counts.begin()); - - renumber_map_label_offsets = - rmm::device_uvector(std::get<0>(*label_offsets).size() + 1, handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - (*renumber_map_label_offsets).begin(), - (*renumber_map_label_offsets).end(), - size_t{0}); - thrust::scatter(handle.get_thrust_policy(), - vertex_counts.begin(), - vertex_counts.end(), - unique_label_indices.begin(), - (*renumber_map_label_offsets).begin() + 1); - - thrust::inclusive_scan(handle.get_thrust_policy(), - (*renumber_map_label_offsets).begin(), - (*renumber_map_label_offsets).end(), - (*renumber_map_label_offsets).begin()); - } - - // 4. renumber input edges - - if (label_offsets) { - rmm::device_uvector new_vertices(renumber_map.size(), handle.get_stream()); - thrust::tabulate(handle.get_thrust_policy(), - new_vertices.begin(), - new_vertices.end(), - [label_indices = raft::device_span( - (*renumber_map_label_indices).data(), (*renumber_map_label_indices).size()), - renumber_map_label_offsets = raft::device_span( - (*renumber_map_label_offsets).data(), - (*renumber_map_label_offsets).size())] __device__(size_t i) { - auto label_index = label_indices[i]; - auto label_start_offset = renumber_map_label_offsets[label_index]; - return static_cast(i - label_start_offset); - }); - - (*renumber_map_label_indices).resize(0, handle.get_stream()); - (*renumber_map_label_indices).shrink_to_fit(handle.get_stream()); - - auto num_labels = std::get<0>(*label_offsets).size(); - - rmm::device_uvector segment_sorted_renumber_map(renumber_map.size(), - handle.get_stream()); - rmm::device_uvector segment_sorted_new_vertices(new_vertices.size(), - handle.get_stream()); - - rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - - auto approx_edges_to_sort_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * - (1 << 20) /* tuning parameter */; // for segmented sort - - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( - handle, - raft::device_span{(*renumber_map_label_offsets).data(), - (*renumber_map_label_offsets).size()}, - renumber_map.size(), - approx_edges_to_sort_per_iteration); - auto num_chunks = h_label_offsets.size() - 1; - - for (size_t i = 0; i < num_chunks; ++i) { - size_t tmp_storage_bytes{0}; - - auto offset_first = - thrust::make_transform_iterator((*renumber_map_label_offsets).data() + h_label_offsets[i], - detail::shift_left_t{h_edge_offsets[i]}); - cub::DeviceSegmentedSort::SortPairs(static_cast(nullptr), - tmp_storage_bytes, - renumber_map.begin() + h_edge_offsets[i], - segment_sorted_renumber_map.begin() + h_edge_offsets[i], - new_vertices.begin() + h_edge_offsets[i], - segment_sorted_new_vertices.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - - if (tmp_storage_bytes > d_tmp_storage.size()) { - d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); - } - - cub::DeviceSegmentedSort::SortPairs(d_tmp_storage.data(), - tmp_storage_bytes, - renumber_map.begin() + h_edge_offsets[i], - segment_sorted_renumber_map.begin() + h_edge_offsets[i], - new_vertices.begin() + h_edge_offsets[i], - segment_sorted_new_vertices.begin() + h_edge_offsets[i], - h_edge_offsets[i + 1] - h_edge_offsets[i], - h_label_offsets[i + 1] - h_label_offsets[i], - offset_first, - offset_first + 1, - handle.get_stream()); - } - new_vertices.resize(0, handle.get_stream()); - d_tmp_storage.resize(0, handle.get_stream()); - new_vertices.shrink_to_fit(handle.get_stream()); - d_tmp_storage.shrink_to_fit(handle.get_stream()); - - auto edgelist_label_indices = detail::expand_sparse_offsets( - std::get<1>(*label_offsets), label_index_t{0}, handle.get_stream()); - - auto pair_first = - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_label_indices.begin()); - thrust::transform( - handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - edgelist_srcs.begin(), - [renumber_map_label_offsets = raft::device_span( - (*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size()), - old_vertices = raft::device_span(segment_sorted_renumber_map.data(), - segment_sorted_renumber_map.size()), - new_vertices = raft::device_span( - segment_sorted_new_vertices.data(), - segment_sorted_new_vertices.size())] __device__(auto pair) { - auto old_vertex = thrust::get<0>(pair); - auto label_index = thrust::get<1>(pair); - auto label_start_offset = renumber_map_label_offsets[label_index]; - auto label_end_offset = renumber_map_label_offsets[label_index + 1]; - auto it = thrust::lower_bound(thrust::seq, - old_vertices.begin() + label_start_offset, - old_vertices.begin() + label_end_offset, - old_vertex); - assert(*it == old_vertex); - return *(new_vertices.begin() + thrust::distance(old_vertices.begin(), it)); - }); - - pair_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_label_indices.begin()); - thrust::transform( - handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_dsts.size(), - edgelist_dsts.begin(), - [renumber_map_label_offsets = raft::device_span( - (*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size()), - old_vertices = raft::device_span(segment_sorted_renumber_map.data(), - segment_sorted_renumber_map.size()), - new_vertices = raft::device_span( - segment_sorted_new_vertices.data(), - segment_sorted_new_vertices.size())] __device__(auto pair) { - auto old_vertex = thrust::get<0>(pair); - auto label_index = thrust::get<1>(pair); - auto label_start_offset = renumber_map_label_offsets[label_index]; - auto label_end_offset = renumber_map_label_offsets[label_index + 1]; - auto it = thrust::lower_bound(thrust::seq, - old_vertices.begin() + label_start_offset, - old_vertices.begin() + label_end_offset, - old_vertex); - assert(*it == old_vertex); - return new_vertices[thrust::distance(old_vertices.begin(), it)]; - }); - } else { - kv_store_t kv_store(renumber_map.begin(), - renumber_map.end(), - thrust::make_counting_iterator(vertex_t{0}), - std::numeric_limits::max(), - std::numeric_limits::max(), - handle.get_stream()); - auto kv_store_view = kv_store.view(); - - kv_store_view.find( - edgelist_srcs.begin(), edgelist_srcs.end(), edgelist_srcs.begin(), handle.get_stream()); - kv_store_view.find( - edgelist_dsts.begin(), edgelist_dsts.end(), edgelist_dsts.begin(), handle.get_stream()); - } - - return std::make_tuple(std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(renumber_map), - std::move(renumber_map_label_offsets)); -} - -} // namespace cugraph diff --git a/cpp/src/sampling/renumber_sampled_edgelist_sg_v32_e32.cu b/cpp/src/sampling/renumber_sampled_edgelist_sg_v32_e32.cu deleted file mode 100644 index dee28c593ad..00000000000 --- a/cpp/src/sampling/renumber_sampled_edgelist_sg_v32_e32.cu +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "renumber_sampled_edgelist_impl.cuh" - -#include - -// FIXME: deprecated, to be deleted -namespace cugraph { - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - std::optional>> -renumber_sampled_edgelist( - raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional> edgelist_hops, - std::optional, raft::device_span>> - label_offsets, - bool do_expensive_check); - -} // namespace cugraph diff --git a/cpp/src/sampling/renumber_sampled_edgelist_sg_v64_e64.cu b/cpp/src/sampling/renumber_sampled_edgelist_sg_v64_e64.cu deleted file mode 100644 index 99293c68f0c..00000000000 --- a/cpp/src/sampling/renumber_sampled_edgelist_sg_v64_e64.cu +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "renumber_sampled_edgelist_impl.cuh" - -#include - -// FIXME: deprecated, to be deleted -namespace cugraph { - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - std::optional>> -renumber_sampled_edgelist( - raft::handle_t const& handle, - rmm::device_uvector&& edgelist_srcs, - rmm::device_uvector&& edgelist_dsts, - std::optional> edgelist_hops, - std::optional, raft::device_span>> - label_offsets, - bool do_expensive_check); - -} // namespace cugraph diff --git a/cpp/src/sampling/rw_traversals.hpp b/cpp/src/sampling/rw_traversals.hpp index 45cc1e54cb4..2c5658b32a5 100644 --- a/cpp/src/sampling/rw_traversals.hpp +++ b/cpp/src/sampling/rw_traversals.hpp @@ -18,8 +18,6 @@ // #pragma once -#include "utilities/graph_utils.cuh" - #include #include diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index b0b3bb5f4f2..4624e6d4a5e 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -49,9 +49,10 @@ namespace cugraph { namespace { -template +template struct edge_order_t { thrust::optional> edgelist_label_offsets{thrust::nullopt}; + thrust::optional> edgelist_edge_types{thrust::nullopt}; thrust::optional> edgelist_hops{thrust::nullopt}; raft::device_span edgelist_majors{}; raft::device_span edgelist_minors{}; @@ -72,6 +73,12 @@ struct edge_order_t { if (l_label != r_label) { return l_label < r_label; } } + if (edgelist_edge_types) { + auto l_type = (*edgelist_edge_types)[l_idx]; + auto r_type = (*edgelist_edge_types)[r_idx]; + if (l_type != r_type) { return l_type < r_type; } + } + if (edgelist_hops) { auto l_hop = (*edgelist_hops)[l_idx]; auto r_hop = (*edgelist_hops)[r_idx]; @@ -151,6 +158,7 @@ struct optionally_compute_label_index_t { template @@ -164,8 +172,11 @@ void check_input_edges(raft::handle_t const& handle, std::optional> seed_vertices, std::optional> seed_vertex_label_offsets, std::optional> edgelist_label_offsets, + std::optional> vertex_type_offsets, size_t num_labels, size_t num_hops, + size_t num_vertex_types, + std::optional num_edge_types, bool do_expensive_check) { CUGRAPH_EXPECTS( @@ -193,6 +204,7 @@ void check_input_edges(raft::handle_t const& handle, "(size of the offset array) should be num_labels + 1."); if (edgelist_majors.size() > 0) { + static_assert(std::is_same_v); CUGRAPH_EXPECTS((num_labels >= 1) && (num_labels <= std::numeric_limits::max()), "Invalid input arguments: num_labels should be a positive integer and the " "current implementation assumes that the number of unique labels is no larger " @@ -209,13 +221,16 @@ void check_input_edges(raft::handle_t const& handle, CUGRAPH_EXPECTS( (num_hops == 1) || edgelist_hops.has_value(), "Invalid input arguments: edgelist_hops.has_value() should be true if num_hops >= 2."); - } else { - CUGRAPH_EXPECTS( - "num_labels == 0", - "Invalid input arguments: num_labels should be 0 if the input edge list is empty."); + + static_assert(std::is_same_v); CUGRAPH_EXPECTS( - "num_hops == 0", - "Invalid input arguments: num_hops should be 0 if the input edge list is empty."); + (num_vertex_types >= 1) && (num_vertex_types <= std::numeric_limits::max()), + "Invalid input arguments: num_vertex_types should be a positive integer and the " + "current implementation assumes that the number of vertex types is no larger " + "than std::numeric_limits::max()."); + CUGRAPH_EXPECTS((num_vertex_types == 1) || vertex_type_offsets.has_value(), + "Invalid input arguments: vertex_type_offsets.has_value() should be true if " + "num_vertex_types >= 2."); } CUGRAPH_EXPECTS((!seed_vertices.has_value() && !seed_vertex_label_offsets.has_value()) || @@ -257,6 +272,174 @@ void check_input_edges(raft::handle_t const& handle, "*edgelist_label_offsets and edgelist_(srcs|dsts).size() should coincide."); } + if (edgelist_edge_types && num_edge_types) { + CUGRAPH_EXPECTS( + thrust::count_if(handle.get_thrust_policy(), + (*edgelist_edge_types).begin(), + (*edgelist_edge_types).end(), + [num_edge_types = static_cast(*num_edge_types)] __device__( + edge_type_t edge_type) { return edge_type >= num_edge_types; }) == 0, + "Invalid input arguments: edgelist_edge_type is valid but contains out-of-range edge type " + "values."); + if constexpr (std::is_signed_v) { + CUGRAPH_EXPECTS(thrust::count_if(handle.get_thrust_policy(), + (*edgelist_edge_types).begin(), + (*edgelist_edge_types).end(), + [] __device__(edge_type_t edge_type) { + return edge_type < edge_type_t{0}; + }) == 0, + "Invalid input arguments: edgelist_edge_type is valid but contains " + "negative edge type values."); + } + } + + if (vertex_type_offsets) { + CUGRAPH_EXPECTS( + thrust::is_sorted( + handle.get_thrust_policy(), (*vertex_type_offsets).begin(), (*vertex_type_offsets).end()), + "Invalid input arguments: if vertex_type_offsets is valid, " + "*vertex_type_offsets should be sorted."); + vertex_t front_element{}; + raft::update_host( + &front_element, (*vertex_type_offsets).data(), size_t{1}, handle.get_stream()); + vertex_t back_element{}; + raft::update_host(&back_element, + (*vertex_type_offsets).data() + num_vertex_types, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + CUGRAPH_EXPECTS( + front_element == vertex_t{0}, + "Invalid input arguments: if vertex_type_offsets is valid, the first element of " + "*vertex_type_offsets should be 0."); + vertex_t max_v = std::max(thrust::reduce(handle.get_thrust_policy(), + edgelist_majors.begin(), + edgelist_majors.end(), + vertex_t{0}, + thrust::maximum{}), + thrust::reduce(handle.get_thrust_policy(), + edgelist_minors.begin(), + edgelist_minors.end(), + vertex_t{0}, + thrust::maximum{})); + CUGRAPH_EXPECTS( + back_element > max_v, + "Invalid input arguments: if vertex_type_offsets is valid, the last element of " + "*vertex_type_offsets should be larger than the maximum vertex ID in edgelist_majors & " + "edgelist_minors."); + + rmm::device_uvector tmp_majors(edgelist_majors.size(), handle.get_stream()); + rmm::device_uvector tmp_minors(edgelist_minors.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + edgelist_majors.begin(), + edgelist_majors.end(), + tmp_majors.begin()); + thrust::copy(handle.get_thrust_policy(), + edgelist_minors.begin(), + edgelist_minors.end(), + tmp_minors.begin()); + if (edgelist_edge_types) { + rmm::device_uvector tmp_edge_types((*edgelist_edge_types).size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*edgelist_edge_types).begin(), + (*edgelist_edge_types).end(), + tmp_edge_types.begin()); + auto triplet_first = + thrust::make_zip_iterator(tmp_edge_types.begin(), tmp_majors.begin(), tmp_minors.begin()); + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + tmp_majors.size()); + CUGRAPH_EXPECTS( + thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(tmp_majors.size()), + [vertex_type_offsets = *vertex_type_offsets, triplet_first] __device__(size_t i) { + if (i > 0) { + auto prev = *(triplet_first + i - 1); + auto cur = *(triplet_first + i); + if (thrust::get<0>(prev) == thrust::get<0>(cur)) { // same edge type + auto prev_major_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<1>(prev))); + auto cur_major_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<1>(cur))); + if (prev_major_v_type != cur_major_v_type) { return true; } + auto prev_minor_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<2>(prev))); + auto cur_minor_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<2>(cur))); + if (prev_minor_v_type != cur_minor_v_type) { return true; } + } + } + return false; + }) == 0, + "Invalid input arguments: if vertex_type_offsets and edgelist_edge_types are valid, the " + "entire set of input edge source vertices for each edge type should have an identical " + "vertex type, and the entire set of input edge destination vertices for each type should " + "have an identical vertex type."); + } else { + auto pair_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minors.begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_majors.size()); + CUGRAPH_EXPECTS( + thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(tmp_majors.size()), + [vertex_type_offsets = *vertex_type_offsets, pair_first] __device__(size_t i) { + if (i > 0) { + auto prev = *(pair_first + i - 1); + auto cur = *(pair_first + i); + auto prev_src_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<0>(prev))); + auto cur_src_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<0>(cur))); + if (prev_src_v_type != cur_src_v_type) { return true; } + auto prev_dst_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<1>(prev))); + auto cur_dst_v_type = + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<1>(cur))); + if (prev_dst_v_type != cur_dst_v_type) { return true; } + } + return false; + }) == 0, + "Invalid input arguments: if vertex_type_offsets is valid (but " + "edgelist_edge_types is invalid), the entire set of input edge source " + "vertices should have an identical vertex type, and the entire set of " + "input edge destination vertices should have an identical vertex type."); + } + } + if (seed_vertices) { for (size_t i = 0; i < num_labels; ++i) { rmm::device_uvector this_label_seed_vertices(0, handle.get_stream()); @@ -356,7 +539,7 @@ compute_min_hop_for_unique_label_vertex_pairs( std::optional> seed_vertex_label_offsets, std::optional> edgelist_label_offsets) { - auto approx_edges_to_sort_per_iteration = + auto approx_items_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 18) /* tuning parameter */; // for segmented sort @@ -369,7 +552,7 @@ compute_min_hop_for_unique_label_vertex_pairs( detail::compute_offset_aligned_element_chunks(handle, *edgelist_label_offsets, edgelist_vertices.size(), - approx_edges_to_sort_per_iteration); + approx_items_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; if (edgelist_hops) { @@ -406,28 +589,28 @@ compute_min_hop_for_unique_label_vertex_pairs( } tmp_indices.resize( - thrust::distance( - tmp_indices.begin(), - thrust::unique(handle.get_thrust_policy(), - tmp_indices.begin(), - tmp_indices.end(), - [edgelist_label_offsets = *edgelist_label_offsets, - edgelist_vertices, - edgelist_hops = *edgelist_hops] __device__(size_t l_idx, size_t r_idx) { - auto l_it = thrust::upper_bound(thrust::seq, - edgelist_label_offsets.begin() + 1, - edgelist_label_offsets.end(), - l_idx); - auto r_it = thrust::upper_bound(thrust::seq, - edgelist_label_offsets.begin() + 1, - edgelist_label_offsets.end(), - r_idx); - if (l_it != r_it) { return false; } - - auto l_vertex = edgelist_vertices[l_idx]; - auto r_vertex = edgelist_vertices[r_idx]; - return l_vertex == r_vertex; - })), + thrust::distance(tmp_indices.begin(), + thrust::unique(handle.get_thrust_policy(), + tmp_indices.begin(), + tmp_indices.end(), + [edgelist_label_offsets = *edgelist_label_offsets, + edgelist_vertices] __device__(size_t l_idx, size_t r_idx) { + auto l_it = + thrust::upper_bound(thrust::seq, + edgelist_label_offsets.begin() + 1, + edgelist_label_offsets.end(), + l_idx); + auto r_it = + thrust::upper_bound(thrust::seq, + edgelist_label_offsets.begin() + 1, + edgelist_label_offsets.end(), + r_idx); + if (l_it != r_it) { return false; } + + auto l_vertex = edgelist_vertices[l_idx]; + auto r_vertex = edgelist_vertices[r_idx]; + return l_vertex == r_vertex; + })), handle.get_stream()); tmp_label_indices.resize(tmp_indices.size(), handle.get_stream()); @@ -859,17 +1042,23 @@ compute_min_hop_for_unique_label_vertex_pairs( } } -template -std::tuple, std::optional>> -compute_renumber_map(raft::handle_t const& handle, - raft::device_span edgelist_majors, - raft::device_span edgelist_minors, - std::optional> edgelist_hops, - std::optional> seed_vertices, - std::optional> seed_vertex_label_offsets, - std::optional> edgelist_label_offsets) +// returns renumber map & optional (label, type) offsets +// indices are non-descedning) +template +std::tuple, std::optional>> +compute_vertex_renumber_map( + raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors, + std::optional> edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + std::optional> vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types) { - auto approx_edges_to_sort_per_iteration = + auto approx_items_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort @@ -892,10 +1081,9 @@ compute_renumber_map(raft::handle_t const& handle, compute_min_hop_for_unique_label_vertex_pairs( handle, edgelist_minors, edgelist_hops, std::nullopt, std::nullopt, edgelist_label_offsets); + rmm::device_uvector renumber_map(0, handle.get_stream()); + std::optional> renumber_map_label_type_offsets{std::nullopt}; if (edgelist_label_offsets) { - auto num_labels = (*edgelist_label_offsets).size() - 1; - - rmm::device_uvector renumber_map(0, handle.get_stream()); rmm::device_uvector renumber_map_label_indices(0, handle.get_stream()); renumber_map.reserve((*unique_label_major_pair_label_indices).size() + @@ -903,8 +1091,8 @@ compute_renumber_map(raft::handle_t const& handle, handle.get_stream()); renumber_map_label_indices.reserve(renumber_map.capacity(), handle.get_stream()); - auto num_chunks = (edgelist_majors.size() + (approx_edges_to_sort_per_iteration - 1)) / - approx_edges_to_sort_per_iteration; + auto num_chunks = (edgelist_majors.size() + (approx_items_to_sort_per_iteration - 1)) / + approx_items_to_sort_per_iteration; auto chunk_size = (num_chunks > 0) ? ((num_labels + (num_chunks - 1)) / num_chunks) : 0; size_t copy_offset{0}; @@ -963,12 +1151,37 @@ compute_renumber_map(raft::handle_t const& handle, merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); merged_hops.resize(merged_label_indices.size(), handle.get_stream()); merged_flags.resize(merged_label_indices.size(), handle.get_stream()); - auto sort_key_first = thrust::make_zip_iterator( - merged_label_indices.begin(), merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_label_indices.size(), - merged_vertices.begin()); + if (vertex_type_offsets) { + auto quadraplet_first = thrust::make_zip_iterator(merged_label_indices.begin(), + merged_vertices.begin(), + merged_hops.begin(), + merged_flags.begin()); + thrust::sort( + handle.get_thrust_policy(), + quadraplet_first, + quadraplet_first + merged_vertices.size(), + [offsets = *vertex_type_offsets] __device__(auto lhs, auto rhs) { + auto lhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<1>(lhs))); + auto rhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<1>(rhs))); + return thrust::make_tuple( + thrust::get<0>(lhs), lhs_v_type, thrust::get<2>(lhs), thrust::get<3>(lhs)) < + thrust::make_tuple( + thrust::get<0>(rhs), rhs_v_type, thrust::get<2>(rhs), thrust::get<3>(rhs)); + }); + } else { + auto sort_key_first = thrust::make_zip_iterator( + merged_label_indices.begin(), merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } } else { auto major_triplet_first = thrust::make_zip_iterator((*unique_label_major_pair_label_indices).begin(), @@ -999,12 +1212,33 @@ compute_renumber_map(raft::handle_t const& handle, handle.get_stream()); merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); merged_flags.resize(merged_label_indices.size(), handle.get_stream()); - auto sort_key_first = - thrust::make_zip_iterator(merged_label_indices.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_label_indices.size(), - merged_vertices.begin()); + if (vertex_type_offsets) { + auto triplet_first = thrust::make_zip_iterator( + merged_label_indices.begin(), merged_vertices.begin(), merged_flags.begin()); + thrust::sort( + handle.get_thrust_policy(), + triplet_first, + triplet_first + merged_vertices.size(), + [offsets = *vertex_type_offsets] __device__(auto lhs, auto rhs) { + auto lhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<1>(lhs))); + auto rhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<1>(rhs))); + return thrust::make_tuple(thrust::get<0>(lhs), lhs_v_type, thrust::get<2>(lhs)) < + thrust::make_tuple(thrust::get<0>(rhs), rhs_v_type, thrust::get<2>(rhs)); + }); + } else { + auto sort_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } } renumber_map.resize(copy_offset + merged_vertices.size(), handle.get_stream()); @@ -1025,7 +1259,41 @@ compute_renumber_map(raft::handle_t const& handle, renumber_map.shrink_to_fit(handle.get_stream()); renumber_map_label_indices.shrink_to_fit(handle.get_stream()); - return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_indices)); + renumber_map_label_type_offsets = + rmm::device_uvector(num_labels * num_vertex_types + 1, handle.get_stream()); + (*renumber_map_label_type_offsets).set_element_to_zero_async(0, handle.get_stream()); + if (vertex_type_offsets) { + auto label_type_pair_first = thrust::make_zip_iterator( + renumber_map_label_indices.begin(), + thrust::make_transform_iterator( + renumber_map.begin(), + cuda::proclaim_return_type( + [offsets = *vertex_type_offsets] __device__(auto v) { + return static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), v))); + }))); + auto value_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type>( + [num_vertex_types] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_vertex_types), + static_cast(i % num_vertex_types)); + })); + thrust::upper_bound(handle.get_thrust_policy(), + label_type_pair_first, + label_type_pair_first + renumber_map.size(), + value_first, + value_first + (num_labels * num_vertex_types), + (*renumber_map_label_type_offsets).begin() + 1); + } else { + thrust::upper_bound(handle.get_thrust_policy(), + renumber_map_label_indices.begin(), + renumber_map_label_indices.end(), + thrust::make_counting_iterator(label_index_t{0}), + thrust::make_counting_iterator(static_cast(num_labels)), + (*renumber_map_label_type_offsets).begin() + 1); + } } else { if (edgelist_hops) { rmm::device_uvector merged_vertices( @@ -1067,13 +1335,34 @@ compute_renumber_map(raft::handle_t const& handle, merged_hops.resize(merged_vertices.size(), handle.get_stream()); merged_flags.resize(merged_vertices.size(), handle.get_stream()); - auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - merged_vertices.begin()); + if (vertex_type_offsets) { + auto triplet_first = thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin()); + thrust::sort( + handle.get_thrust_policy(), + triplet_first, + triplet_first + merged_vertices.size(), + [offsets = *vertex_type_offsets] __device__(auto lhs, auto rhs) { + auto lhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(lhs))); + auto rhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(rhs))); + return thrust::make_tuple(lhs_v_type, thrust::get<1>(lhs), thrust::get<2>(lhs)) < + thrust::make_tuple(rhs_v_type, thrust::get<1>(rhs), thrust::get<2>(rhs)); + }); + } else { + auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + } - return std::make_tuple(std::move(merged_vertices), std::nullopt); + renumber_map = std::move(merged_vertices); } else { rmm::device_uvector output_vertices(unique_label_minor_pair_vertices.size(), handle.get_stream()); @@ -1085,7 +1374,7 @@ compute_renumber_map(raft::handle_t const& handle, output_vertices.begin()); auto num_unique_majors = unique_label_major_pair_vertices.size(); - auto renumber_map = std::move(unique_label_major_pair_vertices); + renumber_map = std::move(unique_label_major_pair_vertices); renumber_map.resize( renumber_map.size() + thrust::distance(output_vertices.begin(), output_last), handle.get_stream()); @@ -1094,9 +1383,370 @@ compute_renumber_map(raft::handle_t const& handle, output_last, renumber_map.begin() + num_unique_majors); - return std::make_tuple(std::move(renumber_map), std::nullopt); + if (vertex_type_offsets) { + thrust::stable_sort( + handle.get_thrust_policy(), + renumber_map.begin(), + renumber_map.end(), + [offsets = *vertex_type_offsets] __device__(auto lhs, auto rhs) { + auto lhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(lhs))); + auto rhs_v_type = thrust::distance( + offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, offsets.begin() + 1, offsets.end(), thrust::get<0>(rhs))); + return lhs_v_type < rhs_v_type; + }); + } + } + + if (vertex_type_offsets) { + renumber_map_label_type_offsets = + rmm::device_uvector(num_vertex_types + 1, handle.get_stream()); + (*renumber_map_label_type_offsets).set_element_to_zero_async(0, handle.get_stream()); + auto type_first = thrust::make_transform_iterator( + renumber_map.begin(), + cuda::proclaim_return_type( + [offsets = *vertex_type_offsets] __device__(auto v) { + return static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), v))); + })); + thrust::upper_bound( + handle.get_thrust_policy(), + type_first, + type_first + renumber_map.size(), + thrust::make_counting_iterator(vertex_type_t{0}), + thrust::make_counting_iterator(static_cast(num_vertex_types)), + (*renumber_map_label_type_offsets).begin() + 1); + } + } + + return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_type_offsets)); +} + +// returns renumber map & optional (label, type) offsets +template +std::tuple, std::optional>> +compute_edge_id_renumber_map( + raft::handle_t const& handle, + raft::device_span edgelist_edge_ids, + std::optional> edgelist_edge_types, + std::optional> edgelist_hops, + std::optional> edgelist_label_offsets, + size_t num_labels, + size_t num_edge_types) +{ + rmm::device_uvector renumber_map(0, handle.get_stream()); + std::optional> renumber_map_label_type_offsets{std::nullopt}; + if (edgelist_label_offsets) { + auto approx_items_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + auto [h_label_offsets, h_edge_offsets] = + detail::compute_offset_aligned_element_chunks(handle, + *edgelist_label_offsets, + edgelist_edge_ids.size(), + approx_items_to_sort_per_iteration); + auto num_chunks = h_label_offsets.size() - 1; + + rmm::device_uvector tmp_indices(edgelist_edge_ids.size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), tmp_indices.begin(), tmp_indices.end(), size_t{0}); + + // cub::DeviceSegmentedSort currently does not suuport thrust::tuple type keys, sorting in + // chunks still helps in limiting the binary search range and improving memory locality + for (size_t i = 0; i < num_chunks; ++i) { + // sort by (label, (type), id, (hop)) + + thrust::sort( + handle.get_thrust_policy(), + tmp_indices.begin() + h_edge_offsets[i], + tmp_indices.begin() + h_edge_offsets[i + 1], + [edgelist_label_offsets = + raft::device_span((*edgelist_label_offsets).data() + h_label_offsets[i], + (h_label_offsets[i + 1] - h_label_offsets[i]) + 1), + edgelist_edge_types = detail::to_thrust_optional(edgelist_edge_types), + edgelist_edge_ids, + edgelist_hops = detail::to_thrust_optional(edgelist_hops)] __device__(size_t l_idx, + size_t r_idx) { + auto l_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), l_idx); + auto r_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), r_idx); + if (l_it != r_it) { return l_it < r_it; } + + if (edgelist_edge_types) { + auto l_type = (*edgelist_edge_types)[l_idx]; + auto r_type = (*edgelist_edge_types)[r_idx]; + if (l_type != r_type) { return l_type < r_type; } + } + + auto l_id = edgelist_edge_ids[l_idx]; + auto r_id = edgelist_edge_ids[r_idx]; + if (l_id != r_id) { return l_id < r_id; } + + if (edgelist_hops) { + auto l_hop = (*edgelist_hops)[l_idx]; + auto r_hop = (*edgelist_hops)[r_idx]; + return l_hop < r_hop; + } + + return false; + }); + + // find unique (label, (type), id, (min_hop)) tuples + + auto last = thrust::unique( + handle.get_thrust_policy(), + tmp_indices.begin() + h_edge_offsets[i], + tmp_indices.begin() + h_edge_offsets[i + 1], + [edgelist_label_offsets = *edgelist_label_offsets, + edgelist_edge_types = detail::to_thrust_optional(edgelist_edge_types), + edgelist_edge_ids] __device__(size_t l_idx, size_t r_idx) { + auto l_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), l_idx); + auto r_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), r_idx); + if (l_it != r_it) { return false; } + + if (edgelist_edge_types) { + auto l_type = (*edgelist_edge_types)[l_idx]; + auto r_type = (*edgelist_edge_types)[r_idx]; + if (l_type != r_type) { return false; } + } + + auto l_id = edgelist_edge_ids[l_idx]; + auto r_id = edgelist_edge_ids[r_idx]; + return l_id == r_id; + }); + + // sort by (label, (type), (min_hop), id) + + if (edgelist_hops) { + thrust::sort( + handle.get_thrust_policy(), + tmp_indices.begin() + h_edge_offsets[i], + last, + [edgelist_label_offsets = + raft::device_span((*edgelist_label_offsets).data() + h_label_offsets[i], + (h_label_offsets[i + 1] - h_label_offsets[i]) + 1), + edgelist_edge_types = detail::to_thrust_optional(edgelist_edge_types), + edgelist_edge_ids, + edgelist_hops = detail::to_thrust_optional(edgelist_hops)] __device__(size_t l_idx, + size_t r_idx) { + auto l_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), l_idx); + auto r_it = thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), r_idx); + if (l_it != r_it) { return l_it < r_it; } + + if (edgelist_edge_types) { + auto l_type = (*edgelist_edge_types)[l_idx]; + auto r_type = (*edgelist_edge_types)[r_idx]; + if (l_type != r_type) { return l_type < r_type; } + } + + if (edgelist_hops) { + auto l_hop = (*edgelist_hops)[l_idx]; + auto r_hop = (*edgelist_hops)[r_idx]; + return l_hop < r_hop; + } + + auto l_id = edgelist_edge_ids[l_idx]; + auto r_id = edgelist_edge_ids[r_idx]; + if (l_id != r_id) { return l_id < r_id; } + + return false; + }); + } + + // mark invalid indices + + thrust::fill(handle.get_thrust_policy(), + last, + tmp_indices.begin() + h_edge_offsets[i + 1], + std::numeric_limits::max()); + } + + tmp_indices.resize(thrust::distance(tmp_indices.begin(), + thrust::remove(handle.get_thrust_policy(), + tmp_indices.begin(), + tmp_indices.end(), + std::numeric_limits::max())), + handle.get_stream()); + + renumber_map = rmm::device_uvector(tmp_indices.size(), handle.get_stream()); + thrust::gather(handle.get_thrust_policy(), + tmp_indices.begin(), + tmp_indices.end(), + edgelist_edge_ids.begin(), + renumber_map.begin()); + + renumber_map_label_type_offsets = + rmm::device_uvector(num_labels * num_edge_types + 1, handle.get_stream()); + (*renumber_map_label_type_offsets).set_element_to_zero_async(0, handle.get_stream()); + if (edgelist_edge_types) { + auto label_type_pair_first = thrust::make_transform_iterator( + tmp_indices.begin(), + cuda::proclaim_return_type>( + [edgelist_label_offsets = *edgelist_label_offsets, + edgelist_edge_types = *edgelist_edge_types] __device__(size_t i) { + auto label_idx = thrust::distance( + edgelist_label_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), i)); + return thrust::make_tuple(static_cast(label_idx), + edgelist_edge_types[i]); + })); + auto value_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type>( + [num_edge_types] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_edge_types), + static_cast(i % num_edge_types)); + })); + thrust::upper_bound(handle.get_thrust_policy(), + label_type_pair_first, + label_type_pair_first + renumber_map.size(), + value_first, + value_first + (num_labels * num_edge_types), + (*renumber_map_label_type_offsets).begin() + 1); + } else { + auto label_first = thrust::make_transform_iterator( + tmp_indices.begin(), + cuda::proclaim_return_type( + [edgelist_label_offsets = *edgelist_label_offsets] __device__(size_t i) { + auto label_idx = thrust::distance( + edgelist_label_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), i)); + return static_cast(label_idx); + })); + auto value_first = thrust::make_counting_iterator(label_index_t{0}); + thrust::upper_bound(handle.get_thrust_policy(), + label_first, + label_first + renumber_map.size(), + value_first, + value_first + num_labels, + (*renumber_map_label_type_offsets).begin() + 1); + } + } else { + // copy + + std::optional> tmp_types{std::nullopt}; + if (edgelist_edge_types) { + tmp_types = + rmm::device_uvector((*edgelist_edge_types).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*edgelist_edge_types).begin(), + (*edgelist_edge_types).end(), + (*tmp_types).begin()); + } + rmm::device_uvector tmp_ids(edgelist_edge_ids.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + edgelist_edge_ids.begin(), + edgelist_edge_ids.end(), + tmp_ids.begin()); + std::optional> tmp_hops{std::nullopt}; + if (edgelist_hops) { + tmp_hops = rmm::device_uvector((*edgelist_hops).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*edgelist_hops).begin(), + (*edgelist_hops).end(), + (*tmp_hops).begin()); + } + + // sort by ((type), id, (hop)) + + if (tmp_types) { + if (tmp_hops) { + auto triplet_first = + thrust::make_zip_iterator((*tmp_types).begin(), tmp_ids.begin(), (*tmp_hops).begin()); + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + tmp_ids.size()); + } else { + auto pair_first = thrust::make_zip_iterator((*tmp_types).begin(), tmp_ids.begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_ids.size()); + } + } else { + if (tmp_hops) { + auto pair_first = thrust::make_zip_iterator(tmp_ids.begin(), (*tmp_hops).begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_ids.size()); + } else { + thrust::sort(handle.get_thrust_policy(), tmp_ids.begin(), tmp_ids.end()); + } + } + + // find unique ((type), id, (min_hop)) tuples + + if (tmp_types) { + auto pair_first = thrust::make_zip_iterator((*tmp_types).begin(), tmp_ids.begin()); + if (tmp_hops) { + tmp_ids.resize( + thrust::distance(pair_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + tmp_ids.size(), + (*tmp_hops).begin()))), + handle.get_stream()); + (*tmp_hops).resize(tmp_ids.size(), handle.get_stream()); + } else { + tmp_ids.resize( + thrust::distance( + pair_first, + thrust::unique(handle.get_thrust_policy(), pair_first, pair_first + tmp_ids.size())), + handle.get_stream()); + } + (*tmp_types).resize(tmp_ids.size(), handle.get_stream()); + } else { + if (tmp_hops) { + tmp_ids.resize( + thrust::distance( + tmp_ids.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), tmp_ids.begin(), tmp_ids.end(), (*tmp_hops).begin()))), + handle.get_stream()); + (*tmp_hops).resize(tmp_ids.size(), handle.get_stream()); + } else { + tmp_ids.resize( + thrust::distance( + tmp_ids.begin(), + thrust::unique(handle.get_thrust_policy(), tmp_ids.begin(), tmp_ids.end())), + handle.get_stream()); + } + } + + // sort by ((type), (min_hop), id) + + if (tmp_hops) { + if (tmp_types) { + auto triplet_first = + thrust::make_zip_iterator((*tmp_types).begin(), (*tmp_hops).begin(), tmp_ids.begin()); + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + tmp_ids.size()); + } else { + auto pair_first = thrust::make_zip_iterator((*tmp_hops).begin(), tmp_ids.begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_ids.size()); + } + } + + renumber_map = std::move(tmp_ids); + + if (tmp_types) { + renumber_map_label_type_offsets = + rmm::device_uvector(num_edge_types + 1, handle.get_stream()); + (*renumber_map_label_type_offsets).set_element_to_zero_async(0, handle.get_stream()); + thrust::upper_bound(handle.get_thrust_policy(), + (*tmp_types).begin(), + (*tmp_types).end(), + thrust::make_counting_iterator(edge_type_t{0}), + thrust::make_counting_iterator(static_cast(num_edge_types)), + (*renumber_map_label_type_offsets).begin() + 1); } } + + return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_type_offsets)); } // this function does not reorder edges (the i'th returned edge is the renumbered output of the @@ -1117,74 +1767,45 @@ renumber_sampled_edgelist(raft::handle_t const& handle, size_t num_labels, bool do_expensive_check) { - // 1. compute renumber_map + using vertex_type_t = uint32_t; // dummy - auto [renumber_map, renumber_map_label_indices] = compute_renumber_map( - handle, - raft::device_span(edgelist_majors.data(), edgelist_majors.size()), - raft::device_span(edgelist_minors.data(), edgelist_minors.size()), - edgelist_hops, - seed_vertices ? std::make_optional>((*seed_vertices).data(), - (*seed_vertices).size()) - : std::nullopt, - seed_vertex_label_offsets, - edgelist_label_offsets); - - // 2. compute renumber map offsets for each label + // 1. compute renumber_map - std::optional> renumber_map_label_offsets{}; - if (edgelist_label_offsets) { - auto num_unique_labels = thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator((*renumber_map_label_indices).size()), - detail::is_first_in_run_t{(*renumber_map_label_indices).data()}); - rmm::device_uvector unique_label_indices(num_unique_labels, handle.get_stream()); - rmm::device_uvector vertex_counts(num_unique_labels, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - (*renumber_map_label_indices).begin(), - (*renumber_map_label_indices).end(), - thrust::make_constant_iterator(size_t{1}), - unique_label_indices.begin(), - vertex_counts.begin()); - - renumber_map_label_offsets = rmm::device_uvector(num_labels + 1, handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - (*renumber_map_label_offsets).begin(), - (*renumber_map_label_offsets).end(), - size_t{0}); - thrust::scatter(handle.get_thrust_policy(), - vertex_counts.begin(), - vertex_counts.end(), - unique_label_indices.begin(), - (*renumber_map_label_offsets).begin() + 1); - - thrust::inclusive_scan(handle.get_thrust_policy(), - (*renumber_map_label_offsets).begin(), - (*renumber_map_label_offsets).end(), - (*renumber_map_label_offsets).begin()); - } + auto [renumber_map, renumber_map_label_offsets] = + compute_vertex_renumber_map( + handle, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_hops, + seed_vertices ? std::make_optional>((*seed_vertices).data(), + (*seed_vertices).size()) + : std::nullopt, + seed_vertex_label_offsets, + edgelist_label_offsets, + std::nullopt, + num_labels, + size_t{1}); - // 3. renumber input edges + // 2. renumber input edges if (edgelist_label_offsets) { rmm::device_uvector new_vertices(renumber_map.size(), handle.get_stream()); thrust::tabulate(handle.get_thrust_policy(), new_vertices.begin(), new_vertices.end(), - [label_indices = raft::device_span( - (*renumber_map_label_indices).data(), (*renumber_map_label_indices).size()), - renumber_map_label_offsets = raft::device_span( + [renumber_map_label_offsets = raft::device_span( (*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size())] __device__(size_t i) { - auto label_index = label_indices[i]; + auto label_index = static_cast(thrust::distance( + renumber_map_label_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + renumber_map_label_offsets.begin() + 1, + renumber_map_label_offsets.end(), + i))); auto label_start_offset = renumber_map_label_offsets[label_index]; return static_cast(i - label_start_offset); }); - (*renumber_map_label_indices).resize(0, handle.get_stream()); - (*renumber_map_label_indices).shrink_to_fit(handle.get_stream()); - rmm::device_uvector segment_sorted_renumber_map(renumber_map.size(), handle.get_stream()); rmm::device_uvector segment_sorted_new_vertices(new_vertices.size(), @@ -1192,7 +1813,7 @@ renumber_sampled_edgelist(raft::handle_t const& handle, rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto approx_edges_to_sort_per_iteration = + auto approx_items_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort @@ -1201,7 +1822,7 @@ renumber_sampled_edgelist(raft::handle_t const& handle, raft::device_span{(*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size()}, renumber_map.size(), - approx_edges_to_sort_per_iteration); + approx_items_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -1369,6 +1990,455 @@ renumber_sampled_edgelist(raft::handle_t const& handle, std::move(renumber_map_label_offsets)); } +// this function does not reorder edges (the i'th returned edge is the renumbered output of the +// i'th input edge) +template +std::tuple< + rmm::device_uvector, // edgelist_majors + rmm::device_uvector, // edgelist minors + std::optional>, // edgelist edge IDs + std::optional>, // seed_vertices, + rmm::device_uvector, // vertex renumber_map + rmm::device_uvector, // vertex renumber_map (label, vertex type) offsets + std::optional>, // edge ID renumber map + std::optional>> // edge ID renumber map (label, edge type) offsets +heterogeneous_renumber_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_majors, + rmm::device_uvector&& edgelist_minors, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional> edgelist_hops, + std::optional>&& seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + bool do_expensive_check) +{ + // 1. compute vertex renumber map + + auto [vertex_renumber_map, vertex_renumber_map_label_type_offsets] = + compute_vertex_renumber_map( + handle, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_hops, + seed_vertices ? std::make_optional>((*seed_vertices).data(), + (*seed_vertices).size()) + : std::nullopt, + seed_vertex_label_offsets, + edgelist_label_offsets, + std::make_optional(vertex_type_offsets), + num_labels, + num_vertex_types); + assert(vertex_renumber_map_label_type_offsets.has_value()); + + // 2. compute edge renumber map + + std::optional> edge_id_renumber_map{std::nullopt}; + std::optional> edge_id_renumber_map_label_type_offsets{std::nullopt}; + if (edgelist_edge_ids) { + std::tie(edge_id_renumber_map, edge_id_renumber_map_label_type_offsets) = + compute_edge_id_renumber_map( + handle, + raft::device_span((*edgelist_edge_ids).data(), + (*edgelist_edge_ids).size()), + edgelist_edge_types, + edgelist_hops, + edgelist_label_offsets, + num_labels, + num_edge_types); + } + + auto approx_items_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + // 3. renumber input edge source/destination vertices + + { + rmm::device_uvector new_vertices(vertex_renumber_map.size(), handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + new_vertices.begin(), + new_vertices.end(), + [renumber_map_label_type_offsets = raft::device_span( + (*vertex_renumber_map_label_type_offsets).data(), + (*vertex_renumber_map_label_type_offsets).size())] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + renumber_map_label_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + renumber_map_label_type_offsets.begin() + 1, + renumber_map_label_type_offsets.end(), + i))); + auto start_offset = renumber_map_label_type_offsets[idx]; + return static_cast(i - start_offset); + }); + + rmm::device_uvector segment_sorted_vertex_renumber_map(vertex_renumber_map.size(), + handle.get_stream()); + rmm::device_uvector segment_sorted_new_vertices(new_vertices.size(), + handle.get_stream()); + + rmm::device_uvector d_tmp_storage(0, handle.get_stream()); + + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, + raft::device_span{(*vertex_renumber_map_label_type_offsets).data(), + (*vertex_renumber_map_label_type_offsets).size()}, + vertex_renumber_map.size(), + approx_items_to_sort_per_iteration); + auto num_chunks = h_label_offsets.size() - 1; + + for (size_t i = 0; i < num_chunks; ++i) { + size_t tmp_storage_bytes{0}; + + auto offset_first = thrust::make_transform_iterator( + (*vertex_renumber_map_label_type_offsets).data() + h_label_offsets[i], + detail::shift_left_t{h_edge_offsets[i]}); + cub::DeviceSegmentedSort::SortPairs( + static_cast(nullptr), + tmp_storage_bytes, + vertex_renumber_map.begin() + h_edge_offsets[i], + segment_sorted_vertex_renumber_map.begin() + h_edge_offsets[i], + new_vertices.begin() + h_edge_offsets[i], + segment_sorted_new_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + + if (tmp_storage_bytes > d_tmp_storage.size()) { + d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); + } + + cub::DeviceSegmentedSort::SortPairs( + d_tmp_storage.data(), + tmp_storage_bytes, + vertex_renumber_map.begin() + h_edge_offsets[i], + segment_sorted_vertex_renumber_map.begin() + h_edge_offsets[i], + new_vertices.begin() + h_edge_offsets[i], + segment_sorted_new_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + } + + new_vertices.resize(0, handle.get_stream()); + new_vertices.shrink_to_fit(handle.get_stream()); + + auto pair_first = + thrust::make_zip_iterator(edgelist_majors.begin(), thrust::make_counting_iterator(size_t{0})); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_majors.size(), + edgelist_majors.begin(), + [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), + vertex_type_offsets, + renumber_map_label_type_offsets = + raft::device_span((*vertex_renumber_map_label_type_offsets).data(), + (*vertex_renumber_map_label_type_offsets).size()), + old_vertices = raft::device_span(segment_sorted_vertex_renumber_map.data(), + segment_sorted_vertex_renumber_map.size()), + new_vertices = raft::device_span(segment_sorted_new_vertices.data(), + segment_sorted_new_vertices.size()), + num_vertex_types] __device__(auto pair) { + auto old_vertex = thrust::get<0>(pair); + label_index_t label_idx{0}; + if (edgelist_label_offsets) { + label_idx = static_cast( + thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + thrust::get<1>(pair)))); + } + auto v_type = static_cast(thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, vertex_type_offsets.begin() + 1, vertex_type_offsets.end(), old_vertex))); + auto start_offset = renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type]; + auto end_offset = + renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type + 1]; + auto it = thrust::lower_bound(thrust::seq, + old_vertices.begin() + start_offset, + old_vertices.begin() + end_offset, + old_vertex); + assert(*it == old_vertex); + return *(new_vertices.begin() + thrust::distance(old_vertices.begin(), it)); + }); + + pair_first = + thrust::make_zip_iterator(edgelist_minors.begin(), thrust::make_counting_iterator(size_t{0})); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_minors.size(), + edgelist_minors.begin(), + [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), + vertex_type_offsets, + renumber_map_label_type_offsets = + raft::device_span((*vertex_renumber_map_label_type_offsets).data(), + (*vertex_renumber_map_label_type_offsets).size()), + old_vertices = raft::device_span(segment_sorted_vertex_renumber_map.data(), + segment_sorted_vertex_renumber_map.size()), + new_vertices = raft::device_span(segment_sorted_new_vertices.data(), + segment_sorted_new_vertices.size()), + num_vertex_types] __device__(auto pair) { + auto old_vertex = thrust::get<0>(pair); + label_index_t label_idx{0}; + if (edgelist_label_offsets) { + label_idx = static_cast( + thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + thrust::get<1>(pair)))); + } + auto v_type = static_cast(thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, vertex_type_offsets.begin() + 1, vertex_type_offsets.end(), old_vertex))); + auto start_offset = renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type]; + auto end_offset = + renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type + 1]; + auto it = thrust::lower_bound(thrust::seq, + old_vertices.begin() + start_offset, + old_vertices.begin() + end_offset, + old_vertex); + assert(*it == old_vertex); + return *(new_vertices.begin() + thrust::distance(old_vertices.begin(), it)); + }); + + if (seed_vertices) { + pair_first = thrust::make_zip_iterator((*seed_vertices).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + (*seed_vertices).size(), + (*seed_vertices).begin(), + [seed_vertex_label_offsets = detail::to_thrust_optional(seed_vertex_label_offsets), + vertex_type_offsets, + renumber_map_label_type_offsets = + raft::device_span((*vertex_renumber_map_label_type_offsets).data(), + (*vertex_renumber_map_label_type_offsets).size()), + old_vertices = raft::device_span( + segment_sorted_vertex_renumber_map.data(), segment_sorted_vertex_renumber_map.size()), + new_vertices = raft::device_span(segment_sorted_new_vertices.data(), + segment_sorted_new_vertices.size()), + num_vertex_types] __device__(auto pair) { + auto old_vertex = thrust::get<0>(pair); + label_index_t label_idx{0}; + if (seed_vertex_label_offsets) { + label_idx = static_cast( + thrust::distance((*seed_vertex_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*seed_vertex_label_offsets).begin() + 1, + (*seed_vertex_label_offsets).end(), + thrust::get<1>(pair)))); + } + auto v_type = static_cast( + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + old_vertex))); + auto start_offset = + renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type]; + auto end_offset = + renumber_map_label_type_offsets[label_idx * num_vertex_types + v_type + 1]; + auto it = thrust::lower_bound(thrust::seq, + old_vertices.begin() + start_offset, + old_vertices.begin() + end_offset, + old_vertex); + assert(*it == old_vertex); + return new_vertices[thrust::distance(old_vertices.begin(), it)]; + }); + } + } + + // 4. renumber input edge IDs + + if (edgelist_edge_ids) { + rmm::device_uvector new_edge_ids((*edge_id_renumber_map).size(), + handle.get_stream()); + if (edge_id_renumber_map_label_type_offsets) { + thrust::tabulate(handle.get_thrust_policy(), + new_edge_ids.begin(), + new_edge_ids.end(), + [renumber_map_label_type_offsets = raft::device_span( + (*edge_id_renumber_map_label_type_offsets).data(), + (*edge_id_renumber_map_label_type_offsets).size())] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + renumber_map_label_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + renumber_map_label_type_offsets.begin() + 1, + renumber_map_label_type_offsets.end(), + i))); + auto start_offset = renumber_map_label_type_offsets[idx]; + return static_cast(i - start_offset); + }); + } else { + thrust::sequence( + handle.get_thrust_policy(), new_edge_ids.begin(), new_edge_ids.end(), edge_id_t{0}); + } + + rmm::device_uvector segment_sorted_edge_id_renumber_map( + (*edge_id_renumber_map).size(), handle.get_stream()); + rmm::device_uvector segment_sorted_new_edge_ids(new_edge_ids.size(), + handle.get_stream()); + + if (edge_id_renumber_map_label_type_offsets) { + rmm::device_uvector d_tmp_storage(0, handle.get_stream()); + + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, + raft::device_span{(*edge_id_renumber_map_label_type_offsets).data(), + (*edge_id_renumber_map_label_type_offsets).size()}, + (*edge_id_renumber_map).size(), + approx_items_to_sort_per_iteration); + auto num_chunks = h_label_offsets.size() - 1; + + for (size_t i = 0; i < num_chunks; ++i) { + size_t tmp_storage_bytes{0}; + + auto offset_first = thrust::make_transform_iterator( + (*edge_id_renumber_map_label_type_offsets).data() + h_label_offsets[i], + detail::shift_left_t{h_edge_offsets[i]}); + cub::DeviceSegmentedSort::SortPairs( + static_cast(nullptr), + tmp_storage_bytes, + (*edge_id_renumber_map).begin() + h_edge_offsets[i], + segment_sorted_edge_id_renumber_map.begin() + h_edge_offsets[i], + new_edge_ids.begin() + h_edge_offsets[i], + segment_sorted_new_edge_ids.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + + if (tmp_storage_bytes > d_tmp_storage.size()) { + d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); + } + + cub::DeviceSegmentedSort::SortPairs( + d_tmp_storage.data(), + tmp_storage_bytes, + (*edge_id_renumber_map).begin() + h_edge_offsets[i], + segment_sorted_edge_id_renumber_map.begin() + h_edge_offsets[i], + new_edge_ids.begin() + h_edge_offsets[i], + segment_sorted_new_edge_ids.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + } + + new_edge_ids.resize(0, handle.get_stream()); + new_edge_ids.shrink_to_fit(handle.get_stream()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_id_renumber_map).begin(), + (*edge_id_renumber_map).end(), + segment_sorted_edge_id_renumber_map.begin()); + segment_sorted_new_edge_ids = std::move(new_edge_ids); + thrust::sort_by_key(handle.get_thrust_policy(), + segment_sorted_edge_id_renumber_map.begin(), + segment_sorted_edge_id_renumber_map.end(), + segment_sorted_new_edge_ids.begin()); + } + + if (edge_id_renumber_map_label_type_offsets) { + auto pair_first = thrust::make_zip_iterator((*edgelist_edge_ids).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + (*edgelist_edge_ids).size(), + (*edgelist_edge_ids).begin(), + cuda::proclaim_return_type( + [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), + edge_types = edgelist_edge_types + ? thrust::make_optional>( + (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) + : thrust::nullopt, + renumber_map = + raft::device_span(segment_sorted_edge_id_renumber_map.data(), + segment_sorted_edge_id_renumber_map.size()), + new_edge_ids = raft::device_span(segment_sorted_new_edge_ids.data(), + segment_sorted_new_edge_ids.size()), + renumber_map_label_type_offsets = + raft::device_span((*edge_id_renumber_map_label_type_offsets).data(), + (*edge_id_renumber_map_label_type_offsets).size()), + num_edge_types] __device__(auto pair) { + auto old_edge_id = thrust::get<0>(pair); + auto edge_idx = thrust::get<1>(pair); + size_t label_idx{0}; + if (edgelist_label_offsets) { + label_idx = static_cast( + thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + edge_idx))); + } + edge_type_t edge_type{0}; + if (edge_types) { edge_type = (*edge_types)[edge_idx]; } + auto renumber_map_start_offset = + renumber_map_label_type_offsets[label_idx * num_edge_types + edge_type]; + auto renumber_map_end_offset = + renumber_map_label_type_offsets[label_idx * num_edge_types + edge_type + 1]; + auto it = thrust::lower_bound(thrust::seq, + renumber_map.begin() + renumber_map_start_offset, + renumber_map.begin() + renumber_map_end_offset, + old_edge_id); + assert(*it == old_edge_id); + return *(new_edge_ids.begin() + thrust::distance(renumber_map.begin(), it)); + })); + } else { + thrust::transform( + handle.get_thrust_policy(), + (*edgelist_edge_ids).begin(), + (*edgelist_edge_ids).end(), + (*edgelist_edge_ids).begin(), + cuda::proclaim_return_type( + [renumber_map = + raft::device_span(segment_sorted_edge_id_renumber_map.data(), + segment_sorted_edge_id_renumber_map.size()), + new_edge_ids = raft::device_span( + segment_sorted_new_edge_ids.data(), + segment_sorted_new_edge_ids.size())] __device__(edge_id_t old_edge_id) { + auto it = thrust::lower_bound( + thrust::seq, renumber_map.begin(), renumber_map.end(), old_edge_id); + assert(*it == old_edge_id); + return *(new_edge_ids.begin() + thrust::distance(renumber_map.begin(), it)); + })); + } + } + + return std::make_tuple(std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_edge_ids), + std::move(seed_vertices), + std::move(vertex_renumber_map), + std::move(*vertex_renumber_map_label_type_offsets), + std::move(edge_id_renumber_map), + std::move(edge_id_renumber_map_label_type_offsets)); +} + template void permute_array(raft::handle_t const& handle, IndexIterator index_first, @@ -1390,7 +2460,9 @@ void permute_array(raft::handle_t const& handle, value_first); } -// key: ((label), (hop), major, minor) +// key: +// ((label), (edge type), (hop), major, minor) if use_edge_type_as_sort_key is true +// ((label), (hop), major, minor) if use_edge_type_as_sort_key is false template std::tuple, rmm::device_uvector, @@ -1405,7 +2477,8 @@ sort_sampled_edge_tuples(raft::handle_t const& handle, std::optional>&& edgelist_edge_ids, std::optional>&& edgelist_edge_types, std::optional>&& edgelist_hops, - std::optional> edgelist_label_offsets) + std::optional> edgelist_label_offsets, + bool use_edge_type_as_sort_key) { std::vector h_label_offsets{}; std::vector h_edge_offsets{}; @@ -1427,11 +2500,15 @@ sort_sampled_edge_tuples(raft::handle_t const& handle, rmm::device_uvector indices(h_edge_offsets[i + 1] - h_edge_offsets[i], handle.get_stream()); thrust::sequence(handle.get_thrust_policy(), indices.begin(), indices.end(), size_t{0}); - edge_order_t edge_order_comp{ + edge_order_t edge_order_comp{ edgelist_label_offsets ? thrust::make_optional>( (*edgelist_label_offsets).data() + h_label_offsets[i], (h_label_offsets[i + 1] - h_label_offsets[i]) + 1) : thrust::nullopt, + edgelist_edge_types && use_edge_type_as_sort_key + ? thrust::make_optional>( + (*edgelist_edge_types).data() + h_edge_offsets[i], indices.size()) + : thrust::nullopt, edgelist_hops ? thrust::make_optional>( (*edgelist_hops).data() + h_edge_offsets[i], indices.size()) : thrust::nullopt, @@ -1510,25 +2587,29 @@ renumber_and_compress_sampled_edgelist( bool do_expensive_check) { using label_index_t = uint32_t; + using vertex_type_t = uint32_t; // dummy auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); // 1. check input arguments - check_input_edges(handle, - edgelist_majors, - edgelist_minors, - edgelist_weights, - edgelist_edge_ids, - edgelist_edge_types, - edgelist_hops, - seed_vertices, - seed_vertex_label_offsets, - edgelist_label_offsets, - num_labels, - num_hops, - do_expensive_check); + check_input_edges(handle, + edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + seed_vertices, + seed_vertex_label_offsets, + edgelist_label_offsets, + std::nullopt, + num_labels, + num_hops, + size_t{1}, + std::optional{std::nullopt}, + do_expensive_check); CUGRAPH_EXPECTS( !doubly_compress || !compress_per_hop, @@ -1582,7 +2663,8 @@ renumber_and_compress_sampled_edgelist( std::move(edgelist_edge_ids), std::move(edgelist_edge_types), std::move(edgelist_hops), - edgelist_label_offsets); + edgelist_label_offsets, + false); if (renumbered_seed_vertices) { if (seed_vertex_label_offsets) { @@ -2144,25 +3226,29 @@ renumber_and_sort_sampled_edgelist( bool do_expensive_check) { using label_index_t = uint32_t; + using vertex_type_t = uint32_t; // dummy auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); // 1. check input arguments - check_input_edges(handle, - edgelist_majors, - edgelist_minors, - edgelist_weights, - edgelist_edge_ids, - edgelist_edge_types, - edgelist_hops, - seed_vertices, - seed_vertex_label_offsets, - edgelist_label_offsets, - num_labels, - num_hops, - do_expensive_check); + check_input_edges(handle, + edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + seed_vertices, + seed_vertex_label_offsets, + edgelist_label_offsets, + std::nullopt, + num_labels, + num_hops, + size_t{1}, + std::optional{std::nullopt}, + do_expensive_check); // 2. renumber @@ -2206,7 +3292,8 @@ renumber_and_sort_sampled_edgelist( std::move(edgelist_edge_ids), std::move(edgelist_edge_types), std::move(edgelist_hops), - edgelist_label_offsets); + edgelist_label_offsets, + false); // 4. compute edgelist_label_hop_offsets @@ -2274,6 +3361,218 @@ renumber_and_sort_sampled_edgelist( std::move(renumber_map_label_offsets)); } +template +std::tuple, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // (label, edge type, hop) offsets to the + // edges + rmm::device_uvector, // vertex renumber map + rmm::device_uvector, // (label, vertex type) offsets to the vertex renumber map + std::optional>, // edge ID renumber map + std::optional< + rmm::device_uvector>> // (label, edge type) offsets to the vertex renumber map +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check) +{ + using label_index_t = uint32_t; + using vertex_type_t = uint32_t; + + auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); + auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); + + // 1. check input arguments + + check_input_edges(handle, + edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + seed_vertices, + seed_vertex_label_offsets, + edgelist_label_offsets, + vertex_type_offsets, + num_labels, + num_hops, + num_vertex_types, + std::optional{num_edge_types}, + do_expensive_check); + + // 2. renumber + + std::optional> renumbered_seed_vertices{std::nullopt}; + if (seed_vertices) { + renumbered_seed_vertices = + rmm::device_uvector((*seed_vertices).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*seed_vertices).begin(), + (*seed_vertices).end(), + (*renumbered_seed_vertices).begin()); + } + rmm::device_uvector vertex_renumber_map(0, handle.get_stream()); + rmm::device_uvector vertex_renumber_map_label_type_offsets(0, handle.get_stream()); + std::optional> edge_id_renumber_map{std::nullopt}; + std::optional> edge_id_renumber_map_label_type_offsets{std::nullopt}; + std::tie(edgelist_majors, + edgelist_minors, + edgelist_edge_ids, + std::ignore, + vertex_renumber_map, + vertex_renumber_map_label_type_offsets, + edge_id_renumber_map, + edge_id_renumber_map_label_type_offsets) = + heterogeneous_renumber_sampled_edgelist( + handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_edge_ids), + edgelist_edge_types ? std::make_optional(raft::device_span( + (*edgelist_edge_types).data(), (*edgelist_edge_types).size())) + : std::nullopt, + edgelist_hops ? std::make_optional(raft::device_span((*edgelist_hops).data(), + (*edgelist_hops).size())) + : std::nullopt, + std::move(renumbered_seed_vertices), + seed_vertex_label_offsets, + edgelist_label_offsets, + vertex_type_offsets, + num_labels, + num_vertex_types, + num_edge_types, + do_expensive_check); + + // 3. sort by ((label), (edge type), (hop), major, minor) + + std::tie(edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops) = sort_sampled_edge_tuples(handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_hops), + edgelist_label_offsets, + true); + + // 4. compute edgelist (label, edge type, hop) offsets + + std::optional> edgelist_label_type_hop_offsets{std::nullopt}; + if (edgelist_label_offsets || edgelist_edge_types || edgelist_hops) { + edgelist_label_type_hop_offsets = + rmm::device_uvector(num_labels * num_edge_types * num_hops + 1, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*edgelist_label_type_hop_offsets).begin(), + (*edgelist_label_type_hop_offsets).end(), + size_t{0}); + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_labels * num_edge_types * num_hops), + (*edgelist_label_type_hop_offsets).begin(), + cuda::proclaim_return_type( + [edgelist_label_offsets = detail::to_thrust_optional(edgelist_label_offsets), + edgelist_edge_types = edgelist_edge_types + ? thrust::make_optional>( + (*edgelist_edge_types).data(), (*edgelist_edge_types).size()) + : thrust::nullopt, + edgelist_hops = edgelist_hops ? thrust::make_optional>( + (*edgelist_hops).data(), (*edgelist_hops).size()) + : thrust::nullopt, + num_edge_types, + num_hops, + num_edges = edgelist_majors.size()] __device__(size_t i) { + size_t start_offset{0}; + auto end_offset = num_edges; + + if (edgelist_label_offsets) { + auto l_idx = static_cast(i / (num_edge_types * num_hops)); + start_offset = (*edgelist_label_offsets)[l_idx]; + end_offset = (*edgelist_label_offsets)[l_idx + 1]; + } + + if (edgelist_edge_types) { + auto t = static_cast((i % (num_edge_types * num_hops)) / num_hops); + auto lower_it = thrust::lower_bound(thrust::seq, + (*edgelist_edge_types).begin() + start_offset, + (*edgelist_edge_types).begin() + end_offset, + t); + auto upper_it = thrust::upper_bound(thrust::seq, + (*edgelist_edge_types).begin() + start_offset, + (*edgelist_edge_types).begin() + end_offset, + t); + start_offset = + static_cast(thrust::distance((*edgelist_edge_types).begin(), lower_it)); + end_offset = + static_cast(thrust::distance((*edgelist_edge_types).begin(), upper_it)); + } + + if (edgelist_hops) { + auto h = static_cast(i % num_hops); + auto lower_it = thrust::lower_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + auto upper_it = thrust::upper_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + start_offset = + static_cast(thrust::distance((*edgelist_hops).begin(), lower_it)); + end_offset = static_cast(thrust::distance((*edgelist_hops).begin(), upper_it)); + } + + return end_offset - start_offset; + })); + thrust::exclusive_scan(handle.get_thrust_policy(), + (*edgelist_label_type_hop_offsets).begin(), + (*edgelist_label_type_hop_offsets).end(), + (*edgelist_label_type_hop_offsets).begin()); + } + + edgelist_edge_types = std::nullopt; + edgelist_hops = std::nullopt; + + return std::make_tuple(std::move(src_is_major ? edgelist_majors : edgelist_minors), + std::move(src_is_major ? edgelist_minors : edgelist_majors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_label_type_hop_offsets), + std::move(vertex_renumber_map), + std::move(vertex_renumber_map_label_type_offsets), + std::move(edge_id_renumber_map), + std::move(edge_id_renumber_map_label_type_offsets)); +} + template (handle, - edgelist_majors, - edgelist_minors, - edgelist_weights, - edgelist_edge_ids, - edgelist_edge_types, - edgelist_hops, - std::nullopt, - std::nullopt, - edgelist_label_offsets, - num_labels, - num_hops, - do_expensive_check); + check_input_edges(handle, + edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + std::nullopt, + std::nullopt, + edgelist_label_offsets, + std::nullopt, + num_labels, + num_hops, + size_t{1}, + std::optional{std::nullopt}, + do_expensive_check); // 2. sort by ((l), (h), major, minor) @@ -2332,7 +3635,8 @@ sort_sampled_edgelist(raft::handle_t const& handle, std::move(edgelist_edge_ids), std::move(edgelist_edge_types), std::move(edgelist_hops), - edgelist_label_offsets); + edgelist_label_offsets, + false); // 3. compute edgelist_label_hop_offsets diff --git a/cpp/src/sampling/sampling_post_processing_sg_v32_e32.cu b/cpp/src/sampling/sampling_post_processing_sg_v32_e32.cu index 6b8d8a07d92..ff1add6a02a 100644 --- a/cpp/src/sampling/sampling_post_processing_sg_v32_e32.cu +++ b/cpp/src/sampling/sampling_post_processing_sg_v32_e32.cu @@ -122,6 +122,62 @@ renumber_and_sort_sampled_edgelist( bool src_is_major, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + template std::tuple, rmm::device_uvector, std::optional>, diff --git a/cpp/src/sampling/sampling_post_processing_sg_v32_e64.cu b/cpp/src/sampling/sampling_post_processing_sg_v32_e64.cu index a4b083efd7c..7001dcfdaf3 100644 --- a/cpp/src/sampling/sampling_post_processing_sg_v32_e64.cu +++ b/cpp/src/sampling/sampling_post_processing_sg_v32_e64.cu @@ -122,6 +122,62 @@ renumber_and_sort_sampled_edgelist( bool src_is_major, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + template std::tuple, rmm::device_uvector, std::optional>, diff --git a/cpp/src/sampling/sampling_post_processing_sg_v64_e64.cu b/cpp/src/sampling/sampling_post_processing_sg_v64_e64.cu index a62ca2a0777..3b2b8144420 100644 --- a/cpp/src/sampling/sampling_post_processing_sg_v64_e64.cu +++ b/cpp/src/sampling/sampling_post_processing_sg_v64_e64.cu @@ -122,6 +122,62 @@ renumber_and_sort_sampled_edgelist( bool src_is_major, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>> +heterogeneous_renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional>&& edgelist_hops, + std::optional> seed_vertices, + std::optional> seed_vertex_label_offsets, + std::optional> edgelist_label_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_hops, + size_t num_vertex_types, + size_t num_edge_types, + bool src_is_major, + bool do_expensive_check); + template std::tuple, rmm::device_uvector, std::optional>, diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 5371d53bcf0..f925a142737 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -803,7 +803,7 @@ graph_view_t edge_srcs, raft::device_span edge_dsts, - bool do_expensive_check) + bool do_expensive_check) const { CUGRAPH_EXPECTS( edge_srcs.size() == edge_dsts.size(), @@ -883,7 +883,7 @@ graph_view_t edge_srcs, raft::device_span edge_dsts, - bool do_expensive_check) + bool do_expensive_check) const { CUGRAPH_EXPECTS( edge_srcs.size() == edge_dsts.size(), diff --git a/cpp/src/structure/legacy/graph.cu b/cpp/src/structure/legacy/graph.cu index 7e1238e1558..a504125080b 100644 --- a/cpp/src/structure/legacy/graph.cu +++ b/cpp/src/structure/legacy/graph.cu @@ -14,17 +14,18 @@ * limitations under the License. */ -#include "utilities/graph_utils.cuh" - #include #include +#include #include #include +#include #include #include +#include namespace { @@ -69,15 +70,40 @@ namespace legacy { template void GraphViewBase::get_vertex_identifiers(VT* identifiers) const { - cugraph::detail::sequence(number_of_vertices, identifiers); + thrust::sequence(thrust::device, + thrust::device_pointer_cast(identifiers), + thrust::device_pointer_cast(identifiers + number_of_vertices), + VT{0}); + RAFT_CHECK_CUDA(nullptr); } +// FIXME: Need to get rid of this function... still used in python template void GraphCompressedSparseBaseView::get_source_indices(VT* src_indices) const { CUGRAPH_EXPECTS(offsets != nullptr, "No graph specified"); - cugraph::detail::offsets_to_indices( - offsets, GraphViewBase::number_of_vertices, src_indices); + rmm::cuda_stream_view stream_view; + + raft::device_span indices_span(src_indices, GraphViewBase::number_of_edges); + + if (indices_span.size() > 0) { + thrust::fill(rmm::exec_policy(stream_view), indices_span.begin(), indices_span.end(), VT{0}); + + thrust::for_each(rmm::exec_policy(stream_view), + offsets + 1, + offsets + GraphViewBase::number_of_vertices, + [indices_span] __device__(ET offset) { + if (offset < static_cast(indices_span.size())) { + cuda::atomic_ref atomic_counter( + indices_span.data()[offset]); + atomic_counter.fetch_add(VT{1}, cuda::std::memory_order_relaxed); + } + }); + thrust::inclusive_scan(rmm::exec_policy(stream_view), + indices_span.begin(), + indices_span.end(), + indices_span.begin()); + } } template @@ -152,6 +178,4 @@ void GraphCompressedSparseBaseView::degree(ET* degree, DegreeDirecti } // namespace legacy } // namespace cugraph -#include "utilities/eidir_graph_utils.hpp" - #include diff --git a/cpp/src/structure/select_random_vertices_impl.hpp b/cpp/src/structure/select_random_vertices_impl.hpp index e6857a5beda..d7502b3f6da 100644 --- a/cpp/src/structure/select_random_vertices_impl.hpp +++ b/cpp/src/structure/select_random_vertices_impl.hpp @@ -16,6 +16,7 @@ #pragma once #include "detail/graph_partition_utils.cuh" +#include "from_cugraph_ops/sampling.hpp" #include #include @@ -30,10 +31,6 @@ #include #include -#ifndef NO_CUGRAPH_OPS -#include -#endif - #include #include #include diff --git a/cpp/src/traversal/extract_bfs_paths_impl.cuh b/cpp/src/traversal/extract_bfs_paths_impl.cuh index 3790c0057cb..40030e2e39c 100644 --- a/cpp/src/traversal/extract_bfs_paths_impl.cuh +++ b/cpp/src/traversal/extract_bfs_paths_impl.cuh @@ -17,7 +17,6 @@ #include "detail/graph_partition_utils.cuh" #include "utilities/collect_comm.cuh" -#include "utilities/graph_utils.cuh" #include #include diff --git a/cpp/src/utilities/eidecl_graph_utils.hpp b/cpp/src/utilities/eidecl_graph_utils.hpp deleted file mode 100644 index abf026cbbfe..00000000000 --- a/cpp/src/utilities/eidecl_graph_utils.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -namespace cugraph { -namespace detail { - -extern template __device__ float parallel_prefix_sum(int32_t, int32_t const*, float const*); -extern template __device__ double parallel_prefix_sum(int32_t, int32_t const*, double const*); -extern template __device__ float parallel_prefix_sum(int64_t, int32_t const*, float const*); -extern template __device__ double parallel_prefix_sum(int64_t, int32_t const*, double const*); -extern template __device__ float parallel_prefix_sum(int64_t, int64_t const*, float const*); -extern template __device__ double parallel_prefix_sum(int64_t, int64_t const*, double const*); - -extern template void offsets_to_indices(int const*, int, int*); -extern template void offsets_to_indices(long const*, int, int*); -extern template void offsets_to_indices(long const*, long, long*); - -extern template __attribute__((visibility("hidden"))) __global__ void -offsets_to_indices_kernel(int const*, int, int*); -extern template __attribute__((visibility("hidden"))) __global__ void -offsets_to_indices_kernel(long const*, int, int*); -extern template __attribute__((visibility("hidden"))) __global__ void -offsets_to_indices_kernel(long const*, long, long*); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/utilities/eidir_graph_utils.hpp b/cpp/src/utilities/eidir_graph_utils.hpp deleted file mode 100644 index ba06c6f56ea..00000000000 --- a/cpp/src/utilities/eidir_graph_utils.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -namespace cugraph { -namespace detail { - -template __device__ float parallel_prefix_sum(int32_t, int32_t const*, float const*); -template __device__ double parallel_prefix_sum(int32_t, int32_t const*, double const*); -template __device__ float parallel_prefix_sum(int64_t, int32_t const*, float const*); -template __device__ double parallel_prefix_sum(int64_t, int32_t const*, double const*); -template __device__ float parallel_prefix_sum(int64_t, int64_t const*, float const*); -template __device__ double parallel_prefix_sum(int64_t, int64_t const*, double const*); - -template void offsets_to_indices(int32_t const*, int32_t, int32_t*); -template void offsets_to_indices(int64_t const*, int32_t, int32_t*); -template void offsets_to_indices(int64_t const*, int64_t, int64_t*); - -template __global__ __attribute__((visibility("hidden"))) void -offsets_to_indices_kernel(int32_t const*, int32_t, int32_t*); -template __global__ __attribute__((visibility("hidden"))) void -offsets_to_indices_kernel(int64_t const*, int32_t, int32_t*); -template __global__ __attribute__((visibility("hidden"))) void -offsets_to_indices_kernel(int64_t const*, int64_t, int64_t*); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh deleted file mode 100644 index 0b257e7abde..00000000000 --- a/cpp/src/utilities/graph_utils.cuh +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ - -// Interanl helper functions -// Author: Alex Fender afender@nvidia.com -#pragma once - -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cugraph { -namespace detail { - -// #define DEBUG 1 -#define CUDA_MAX_BLOCKS 65535 -#define CUDA_MAX_KERNEL_THREADS 256 // kernel will launch at most 256 threads per block -#define US - -template -__inline__ __device__ value_t parallel_prefix_sum(count_t n, index_t const* ind, value_t const* w) -{ - count_t i, j, mn; - value_t v, last; - value_t sum = 0.0; - bool valid; - - // Parallel prefix sum (using __shfl) - mn = (((n + blockDim.x - 1) / blockDim.x) * blockDim.x); // n in multiple of blockDim.x - for (i = threadIdx.x; i < mn; i += blockDim.x) { - // All threads (especially the last one) must always participate - // in the shfl instruction, otherwise their sum will be undefined. - // So, the loop stopping condition is based on multiple of n in loop increments, - // so that all threads enter into the loop and inside we make sure we do not - // read out of bounds memory checking for the actual size n. - - // check if the thread is valid - valid = i < n; - - // Notice that the last thread is used to propagate the prefix sum. - // For all the threads, in the first iteration the last is 0, in the following - // iterations it is the value at the last thread of the previous iterations. - - // get the value of the last thread - last = __shfl_sync(raft::warp_full_mask(), sum, blockDim.x - 1, blockDim.x); - - // if you are valid read the value from memory, otherwise set your value to 0 - sum = (valid) ? w[ind[i]] : 0.0; - - // do prefix sum (of size warpSize=blockDim.x =< 32) - for (j = 1; j < blockDim.x; j *= 2) { - v = __shfl_up_sync(raft::warp_full_mask(), sum, j, blockDim.x); - if (threadIdx.x >= j) sum += v; - } - // shift by last - sum += last; - // notice that no __threadfence or __syncthreads are needed in this implementation - } - // get the value of the last thread (to all threads) - last = __shfl_sync(raft::warp_full_mask(), sum, blockDim.x - 1, blockDim.x); - - return last; -} - -// axpy -template -struct axpy_functor : public thrust::binary_function { - const T a; - axpy_functor(T _a) : a(_a) {} - __host__ __device__ T operator()(const T& x, const T& y) const { return a * x + y; } -}; - -template -void axpy(size_t n, T a, T* x, T* y) -{ - rmm::cuda_stream_view stream_view; - thrust::transform(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - thrust::device_pointer_cast(y), - thrust::device_pointer_cast(y), - axpy_functor(a)); - RAFT_CHECK_CUDA(stream_view.value()); -} - -// norm -template -struct square { - __host__ __device__ T operator()(const T& x) const { return x * x; } -}; - -template -T nrm2(size_t n, T* x) -{ - rmm::cuda_stream_view stream_view; - T init = 0; - T result = std::sqrt(thrust::transform_reduce(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - square(), - init, - thrust::plus())); - RAFT_CHECK_CUDA(stream_view.value()); - return result; -} - -template -T nrm1(size_t n, T* x) -{ - rmm::cuda_stream_view stream_view; - T result = thrust::reduce(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n)); - RAFT_CHECK_CUDA(stream_view.value()); - return result; -} - -template -void scal(size_t n, T val, T* x) -{ - rmm::cuda_stream_view stream_view; - thrust::transform(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - thrust::make_constant_iterator(val), - thrust::device_pointer_cast(x), - thrust::multiplies()); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -void addv(size_t n, T val, T* x) -{ - rmm::cuda_stream_view stream_view; - thrust::transform(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - thrust::make_constant_iterator(val), - thrust::device_pointer_cast(x), - thrust::plus()); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -void fill(size_t n, T* x, T value) -{ - rmm::cuda_stream_view stream_view; - thrust::fill(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(x), - thrust::device_pointer_cast(x + n), - value); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -void scatter(size_t n, T* src, T* dst, M* map) -{ - rmm::cuda_stream_view stream_view; - thrust::scatter(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(src), - thrust::device_pointer_cast(src + n), - thrust::device_pointer_cast(map), - thrust::device_pointer_cast(dst)); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -void printv(size_t n, T* vec, int offset) -{ - thrust::device_ptr dev_ptr(vec); - std::cout.precision(15); - std::cout << "sample size = " << n << ", offset = " << offset << std::endl; - thrust::copy( - dev_ptr + offset, - dev_ptr + offset + n, - std::ostream_iterator( - std::cout, " ")); // Assume no RMM dependency; TODO: check / test (potential BUG !!!!!) - RAFT_CHECK_CUDA(nullptr); - std::cout << std::endl; -} - -template -void copy(size_t n, T* x, T* res) -{ - thrust::device_ptr dev_ptr(x); - thrust::device_ptr res_ptr(res); - rmm::cuda_stream_view stream_view; - thrust::copy_n(rmm::exec_policy(stream_view), dev_ptr, n, res_ptr); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -struct is_zero { - __host__ __device__ bool operator()(const T x) { return x == 0; } -}; - -template -struct dangling_functor : public thrust::unary_function { - const T val; - dangling_functor(T _val) : val(_val) {} - __host__ __device__ T operator()(const T& x) const { return val + x; } -}; - -template -void update_dangling_nodes(size_t n, T* dangling_nodes, T damping_factor) -{ - rmm::cuda_stream_view stream_view; - thrust::transform_if(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(dangling_nodes), - thrust::device_pointer_cast(dangling_nodes + n), - thrust::device_pointer_cast(dangling_nodes), - dangling_functor(1.0 - damping_factor), - is_zero()); - RAFT_CHECK_CUDA(stream_view.value()); -} - -// google matrix kernels -template -__global__ static void degree_coo(const IndexType n, - const IndexType e, - const IndexType* ind, - ValueType* degree) -{ - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) - atomicAdd(°ree[ind[i]], (ValueType)1.0); -} - -template -__global__ static void flag_leafs_kernel(const size_t n, - const IndexType* degree, - ValueType* bookmark) -{ - for (auto i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) - if (degree[i] == 0) bookmark[i] = 1.0; -} - -template -__global__ static void degree_offsets(const IndexType n, - const IndexType e, - const IndexType* ind, - ValueType* degree) -{ - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) - degree[i] += ind[i + 1] - ind[i]; -} - -template -__global__ static void type_convert(FromType* array, int n) -{ - for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < n; i += gridDim.x * blockDim.x) { - ToType val = array[i]; - ToType* vals = (ToType*)array; - vals[i] = val; - } -} - -template -__global__ static void equi_prob3(const IndexType n, - const IndexType e, - const IndexType* csrPtr, - const IndexType* csrInd, - ValueType* val, - IndexType* degree) -{ - int j, row, col; - for (row = threadIdx.z + blockIdx.z * blockDim.z; row < n; row += gridDim.z * blockDim.z) { - for (j = csrPtr[row] + threadIdx.y + blockIdx.y * blockDim.y; j < csrPtr[row + 1]; - j += gridDim.y * blockDim.y) { - col = csrInd[j]; - val[j] = 1.0 / degree[col]; - // val[j] = 999; - } - } -} - -template -__global__ static void equi_prob2(const IndexType n, - const IndexType e, - const IndexType* csrPtr, - const IndexType* csrInd, - ValueType* val, - IndexType* degree) -{ - int row = blockIdx.x * blockDim.x + threadIdx.x; - if (row < n) { - int row_begin = csrPtr[row]; - int row_end = csrPtr[row + 1]; - int col; - for (int i = row_begin; i < row_end; i++) { - col = csrInd[i]; - val[i] = 1.0 / degree[col]; - } - } -} - -// compute the H^T values for an already transposed adjacency matrix, leveraging coo info -template -void HT_matrix_csc_coo(const IndexType n, - const IndexType e, - const IndexType* csrPtr, - const IndexType* csrInd, - ValueType* val, - ValueType* bookmark) -{ - rmm::cuda_stream_view stream_view; - rmm::device_uvector degree(n, stream_view); - - dim3 nthreads, nblocks; - nthreads.x = min(e, CUDA_MAX_KERNEL_THREADS); - nthreads.y = 1; - nthreads.z = 1; - nblocks.x = min((e + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); - nblocks.y = 1; - nblocks.z = 1; - degree_coo - <<>>(n, e, csrInd, degree.data()); - RAFT_CHECK_CUDA(stream_view.value()); - - int y = 4; - nthreads.x = 32 / y; - nthreads.y = y; - nthreads.z = 8; - nblocks.x = 1; - nblocks.y = 1; - nblocks.z = min((n + nthreads.z - 1) / nthreads.z, CUDA_MAX_BLOCKS); // 1; - equi_prob3 - <<>>(n, e, csrPtr, csrInd, val, degree.data()); - RAFT_CHECK_CUDA(stream_view.value()); - - ValueType a = 0.0; - fill(n, bookmark, a); - RAFT_CHECK_CUDA(stream_view.value()); - - nthreads.x = min(n, CUDA_MAX_KERNEL_THREADS); - nthreads.y = 1; - nthreads.z = 1; - nblocks.x = min((n + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); - nblocks.y = 1; - nblocks.z = 1; - flag_leafs_kernel - <<>>(n, degree.data(), bookmark); - RAFT_CHECK_CUDA(stream_view.value()); -} - -template -__attribute__((visibility("hidden"))) __global__ void offsets_to_indices_kernel( - const offsets_t* offsets, index_t v, index_t* indices) -{ - auto tid{threadIdx.x}; - auto ctaStart{blockIdx.x}; - - for (index_t j = ctaStart; j < v; j += gridDim.x) { - offsets_t colStart = offsets[j]; - offsets_t colEnd = offsets[j + 1]; - offsets_t rowNnz = colEnd - colStart; - - for (offsets_t i = 0; i < rowNnz; i += blockDim.x) { - if ((colStart + tid + i) < colEnd) { indices[colStart + tid + i] = j; } - } - } -} - -template -void offsets_to_indices(const offsets_t* offsets, index_t v, index_t* indices) -{ - cudaStream_t stream{nullptr}; - index_t nthreads = min(v, (index_t)CUDA_MAX_KERNEL_THREADS); - index_t nblocks = min((v + nthreads - 1) / nthreads, (index_t)CUDA_MAX_BLOCKS); - offsets_to_indices_kernel<<>>(offsets, v, indices); - RAFT_CHECK_CUDA(stream); -} - -template -void sequence(IndexType n, IndexType* vec, IndexType init = 0) -{ - thrust::sequence( - thrust::device, thrust::device_pointer_cast(vec), thrust::device_pointer_cast(vec + n), init); - RAFT_CHECK_CUDA(nullptr); -} - -template -bool has_negative_val(DistType* arr, size_t n) -{ - // custom kernel with boolean bitwise reduce may be - // faster. - rmm::cuda_stream_view stream_view; - DistType result = *thrust::min_element(rmm::exec_policy(stream_view), - thrust::device_pointer_cast(arr), - thrust::device_pointer_cast(arr + n)); - - RAFT_CHECK_CUDA(stream_view.value()); - - return (result < 0); -} - -} // namespace detail -} // namespace cugraph - -#include "eidecl_graph_utils.hpp" diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 52d257b9bea..3752e823659 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -39,6 +39,7 @@ add_library(cugraphtestutil STATIC utilities/misc_utilities.cpp utilities/conversion_utilities_sg.cu utilities/debug_utilities_sg.cpp + utilities/validation_utilities.cu link_prediction/similarity_compare.cpp centrality/betweenness_centrality_validate.cu community/egonet_validate.cu @@ -46,6 +47,7 @@ add_library(cugraphtestutil STATIC structure/induced_subgraph_validate.cu sampling/random_walks_check_sg.cu sampling/detail/nbr_sampling_validate.cu + sampling/detail/sampling_post_processing_validate.cu ../../thirdparty/mmio/mmio.c) target_compile_options(cugraphtestutil @@ -218,6 +220,7 @@ function(ConfigureTestMG CMAKE_TEST_NAME) GPUS ${GPU_COUNT} PERCENT 100 INSTALL_COMPONENT_SET testing_mg + INSTALL_TARGET ${CMAKE_TEST_NAME} ) set_tests_properties(${CMAKE_TEST_NAME} PROPERTIES LABELS "CUGRAPH_MG") @@ -300,6 +303,7 @@ function(ConfigureCTestMG CMAKE_TEST_NAME) GPUS ${GPU_COUNT} PERCENT 100 INSTALL_COMPONENT_SET testing_mg + INSTALL_TARGET ${CMAKE_TEST_NAME} ) set_tests_properties(${CMAKE_TEST_NAME} PROPERTIES LABELS "CUGRAPH_C_MG") @@ -485,7 +489,16 @@ ConfigureTest(BIASED_NEIGHBOR_SAMPLING_TEST sampling/biased_neighbor_sampling.cp ################################################################################################### # - SAMPLING_POST_PROCESSING tests ---------------------------------------------------------------- -ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_test.cu) +ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_test.cpp) + +################################################################################################### +# - SAMPLING_HETEROGENEOUS_POST_PROCESSING tests -------------------------------------------------- +ConfigureTest(SAMPLING_HETEROGENEOUS_POST_PROCESSING_TEST + sampling/sampling_heterogeneous_post_processing_test.cpp) + +################################################################################################### +# - NEGATIVE SAMPLING tests -------------------------------------------------------------------- +ConfigureTest(NEGATIVE_SAMPLING_TEST sampling/negative_sampling.cpp PERCENT 100) ################################################################################################### # - Renumber tests -------------------------------------------------------------------------------- @@ -576,7 +589,8 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG BETWEENNESS CENTRALITY tests ----------------------------------------------------------- ConfigureTestMG(MG_BETWEENNESS_CENTRALITY_TEST centrality/mg_betweenness_centrality_test.cpp) - ConfigureTestMG(MG_EDGE_BETWEENNESS_CENTRALITY_TEST centrality/mg_edge_betweenness_centrality_test.cpp) + ConfigureTestMG(MG_EDGE_BETWEENNESS_CENTRALITY_TEST + centrality/mg_edge_betweenness_centrality_test.cpp) ############################################################################################### # - MG BFS tests ------------------------------------------------------------------------------ @@ -741,6 +755,11 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG BIASED NBR SAMPLING tests -------------------------------------------------------------- ConfigureTestMG(MG_BIASED_NEIGHBOR_SAMPLING_TEST sampling/mg_biased_neighbor_sampling.cpp) + ################################################################################################### + # - NEGATIVE SAMPLING tests -------------------------------------------------------------------- + ConfigureTestMG(MG_NEGATIVE_SAMPLING_TEST sampling/mg_negative_sampling.cpp) + + ############################################################################################### # - MG RANDOM_WALKS tests --------------------------------------------------------------------- ConfigureTestMG(MG_RANDOM_WALKS_TEST sampling/mg_random_walks_test.cpp) @@ -773,6 +792,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_HITS_TEST c_api/mg_hits_test.c) ConfigureCTestMG(MG_CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/mg_uniform_neighbor_sample_test.c) ConfigureCTestMG(MG_CAPI_BIASED_NEIGHBOR_SAMPLE_TEST c_api/mg_biased_neighbor_sample_test.c) + ConfigureCTestMG(MG_CAPI_NEGATIVE_SAMPLING_TEST c_api/mg_negative_sampling_test.c) ConfigureCTestMG(MG_CAPI_LOOKUP_SRC_DST_TEST c_api/mg_lookup_src_dst_test.c) ConfigureCTestMG(MG_CAPI_RANDOM_WALKS_TEST c_api/mg_random_walks_test.c) ConfigureCTestMG(MG_CAPI_TRIANGLE_COUNT_TEST c_api/mg_triangle_count_test.c) @@ -812,6 +832,7 @@ ConfigureCTest(CAPI_WEAKLY_CONNECTED_COMPONENTS_TEST c_api/weakly_connected_comp ConfigureCTest(CAPI_STRONGLY_CONNECTED_COMPONENTS_TEST c_api/strongly_connected_components_test.c) ConfigureCTest(CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/uniform_neighbor_sample_test.c) ConfigureCTest(CAPI_BIASED_NEIGHBOR_SAMPLE_TEST c_api/biased_neighbor_sample_test.c) +ConfigureCTest(CAPI_NEGATIVE_SAMPLING_TEST c_api/negative_sampling_test.c) ConfigureCTest(CAPI_RANDOM_WALKS_TEST c_api/sg_random_walks_test.c) ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c) ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c) diff --git a/cpp/tests/c_api/mg_negative_sampling_test.c b/cpp/tests/c_api/mg_negative_sampling_test.c new file mode 100644 index 00000000000..3289206d8db --- /dev/null +++ b/cpp/tests/c_api/mg_negative_sampling_test.c @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_MG_TEST */ + +#include +#include + +#include +#include +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +int generic_negative_sampling_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + size_t num_vertices, + size_t num_edges, + size_t num_samples, + vertex_t* h_vertices, + weight_t* h_src_bias, + weight_t* h_dst_bias, + size_t num_biases, + bool_t remove_duplicates, + bool_t remove_false_negatives, + bool_t exact_number_of_samples) +{ + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_coo_t* result = NULL; + + ret_code = create_mg_test_graph_new(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + NULL, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + FALSE, + TRUE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_vertices = NULL; + cugraph_type_erased_device_array_view_t* d_vertices_view = NULL; + cugraph_type_erased_device_array_t* d_src_bias = NULL; + cugraph_type_erased_device_array_view_t* d_src_bias_view = NULL; + cugraph_type_erased_device_array_t* d_dst_bias = NULL; + cugraph_type_erased_device_array_view_t* d_dst_bias_view = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + if (num_biases > 0) { + if (rank == 0) { + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, vertex_tid, &d_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_vertices create failed."); + + d_vertices_view = cugraph_type_erased_device_array_view(d_vertices); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_vertices_view, (byte_t*)h_vertices, &ret_error); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, weight_tid, &d_src_bias, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_src_bias create failed."); + + d_src_bias_view = cugraph_type_erased_device_array_view(d_src_bias); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_src_bias_view, (byte_t*)h_src_bias, &ret_error); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, weight_tid, &d_dst_bias, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_dst_bias create failed."); + + d_dst_bias_view = cugraph_type_erased_device_array_view(d_dst_bias); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_dst_bias_view, (byte_t*)h_dst_bias, &ret_error); + } else { + d_vertices_view = cugraph_type_erased_device_array_view_create(NULL, 0, vertex_tid); + d_src_bias_view = cugraph_type_erased_device_array_view_create(NULL, 0, weight_tid); + d_dst_bias_view = cugraph_type_erased_device_array_view_create(NULL, 0, weight_tid); + } + } + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + ret_code = cugraph_negative_sampling(handle, + rng_state, + graph, + d_vertices_view, + d_src_bias_view, + d_dst_bias_view, + num_samples, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples, + FALSE, + &result, + &ret_error); + + cugraph_type_erased_device_array_view_t* result_srcs = NULL; + cugraph_type_erased_device_array_view_t* result_dsts = NULL; + + result_srcs = cugraph_coo_get_sources(result); + result_dsts = cugraph_coo_get_destinations(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + + vertex_t h_result_srcs[result_size]; + vertex_t h_result_dsts[result_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // First, check that all edges are actually part of the graph + int32_t M_exists[num_vertices][num_vertices]; + int32_t M_duplicates[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_exists[i][j] = 0; + M_duplicates[i][j] = 0; + } + + for (int i = 0; i < num_edges; ++i) { + M_exists[h_src[i]][h_dst[i]] = 1; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + (h_result_srcs[i] >= 0) && (h_result_srcs[i] < num_vertices), + "negative_sampling generated an edge that with an invalid vertex"); + TEST_ASSERT(test_ret_value, + (h_result_dsts[i] >= 0) && (h_result_dsts[i] < num_vertices), + "negative_sampling generated an edge that with an invalid vertex"); + if (remove_false_negatives == TRUE) { + TEST_ASSERT(test_ret_value, + M_exists[h_result_srcs[i]][h_result_dsts[i]] == 0, + "negative_sampling generated a false negative edge that should be suppressed"); + } + + if (remove_duplicates == TRUE) { + TEST_ASSERT(test_ret_value, + M_duplicates[h_result_srcs[i]][h_result_dsts[i]] == 0, + "negative_sampling generated a duplicate edge that should be suppressed"); + M_duplicates[h_result_srcs[i]][h_result_dsts[i]] = 1; + } + } + + if (exact_number_of_samples == TRUE) + TEST_ASSERT(test_ret_value, + result_size == num_samples, + "negative_sampling generated a result with an incorrect number of samples"); + + cugraph_type_erased_device_array_view_free(d_vertices_view); + cugraph_type_erased_device_array_view_free(d_src_bias_view); + cugraph_type_erased_device_array_view_free(d_dst_bias_view); + cugraph_type_erased_device_array_free(d_vertices); + cugraph_type_erased_device_array_free(d_src_bias); + cugraph_type_erased_device_array_free(d_dst_bias); + cugraph_coo_free(result); + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_negative_sampling_uniform(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t num_biases = 0; + size_t num_samples = 10; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + + bool_t remove_duplicates = FALSE; + bool_t remove_false_negatives = TRUE; + bool_t exact_number_of_samples = FALSE; + + return generic_negative_sampling_test(handle, + src, + dst, + num_vertices, + num_edges, + num_samples, + NULL, + NULL, + NULL, + num_biases, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples); +} + +int test_negative_sampling_biased(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t num_biases = 6; + size_t num_samples = 10; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + weight_t src_bias[] = {1, 1, 2, 2, 1, 1}; + weight_t dst_bias[] = {2, 2, 1, 1, 1, 1}; + vertex_t vertices[] = {0, 1, 2, 3, 4, 5}; + + bool_t remove_duplicates = FALSE; + bool_t remove_false_negatives = TRUE; + bool_t exact_number_of_samples = FALSE; + + return generic_negative_sampling_test(handle, + src, + dst, + num_vertices, + num_edges, + num_samples, + vertices, + src_bias, + dst_bias, + num_biases, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_negative_sampling_uniform, handle); + result |= RUN_MG_TEST(test_negative_sampling_biased, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/cpp/tests/c_api/negative_sampling_test.c b/cpp/tests/c_api/negative_sampling_test.c new file mode 100644 index 00000000000..5e8d3f7e765 --- /dev/null +++ b/cpp/tests/c_api/negative_sampling_test.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include +#include +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +int generic_negative_sampling_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + size_t num_vertices, + size_t num_edges, + size_t num_samples, + vertex_t* h_vertices, + weight_t* h_src_bias, + weight_t* h_dst_bias, + size_t num_biases, + bool_t remove_duplicates, + bool_t remove_false_negatives, + bool_t exact_number_of_samples) +{ + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_coo_t* result = NULL; + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + NULL, + edge_type_tid, + NULL, + edge_id_tid, + NULL, + num_edges, + FALSE, + TRUE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_vertices = NULL; + cugraph_type_erased_device_array_view_t* d_vertices_view = NULL; + cugraph_type_erased_device_array_t* d_src_bias = NULL; + cugraph_type_erased_device_array_view_t* d_src_bias_view = NULL; + cugraph_type_erased_device_array_t* d_dst_bias = NULL; + cugraph_type_erased_device_array_view_t* d_dst_bias_view = NULL; + + if (num_biases > 0) { + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, vertex_tid, &d_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_vertices create failed."); + + d_vertices_view = cugraph_type_erased_device_array_view(d_vertices); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_vertices_view, (byte_t*)h_vertices, &ret_error); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, weight_tid, &d_src_bias, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_src_bias create failed."); + + d_src_bias_view = cugraph_type_erased_device_array_view(d_src_bias); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_src_bias_view, (byte_t*)h_src_bias, &ret_error); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_biases, weight_tid, &d_dst_bias, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_dst_bias create failed."); + + d_dst_bias_view = cugraph_type_erased_device_array_view(d_dst_bias); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_dst_bias_view, (byte_t*)h_dst_bias, &ret_error); + } + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + ret_code = cugraph_negative_sampling(handle, + rng_state, + graph, + d_vertices_view, + d_src_bias_view, + d_dst_bias_view, + num_samples, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples, + FALSE, + &result, + &ret_error); + + cugraph_type_erased_device_array_view_t* result_srcs = NULL; + cugraph_type_erased_device_array_view_t* result_dsts = NULL; + + result_srcs = cugraph_coo_get_sources(result); + result_dsts = cugraph_coo_get_destinations(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + + vertex_t h_result_srcs[result_size]; + vertex_t h_result_dsts[result_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // First, check that all edges are actually part of the graph + int32_t M_exists[num_vertices][num_vertices]; + int32_t M_duplicates[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_exists[i][j] = 0; + M_duplicates[i][j] = 0; + } + + for (int i = 0; i < num_edges; ++i) { + M_exists[h_src[i]][h_dst[i]] = 1; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + (h_result_srcs[i] >= 0) && (h_result_srcs[i] < num_vertices), + "negative_sampling generated an edge that with an invalid vertex"); + TEST_ASSERT(test_ret_value, + (h_result_dsts[i] >= 0) && (h_result_dsts[i] < num_vertices), + "negative_sampling generated an edge that with an invalid vertex"); + if (remove_false_negatives == TRUE) { + TEST_ASSERT(test_ret_value, + M_exists[h_result_srcs[i]][h_result_dsts[i]] == 0, + "negative_sampling generated a false negative edge that should be suppressed"); + } + + if (remove_duplicates == TRUE) { + TEST_ASSERT(test_ret_value, + M_duplicates[h_result_srcs[i]][h_result_dsts[i]] == 0, + "negative_sampling generated a duplicate edge that should be suppressed"); + M_duplicates[h_result_srcs[i]][h_result_dsts[i]] = 1; + } + } + + if (exact_number_of_samples == TRUE) + TEST_ASSERT(test_ret_value, + result_size == num_samples, + "negative_sampling generated a result with an incorrect number of samples"); + + cugraph_type_erased_device_array_view_free(d_vertices_view); + cugraph_type_erased_device_array_view_free(d_src_bias_view); + cugraph_type_erased_device_array_view_free(d_dst_bias_view); + cugraph_type_erased_device_array_free(d_vertices); + cugraph_type_erased_device_array_free(d_src_bias); + cugraph_coo_free(result); + cugraph_sg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_negative_sampling_uniform(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t num_biases = 0; + size_t num_samples = 10; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + + bool_t remove_duplicates = FALSE; + bool_t remove_false_negatives = TRUE; + bool_t exact_number_of_samples = FALSE; + + return generic_negative_sampling_test(handle, + src, + dst, + num_vertices, + num_edges, + num_samples, + NULL, + NULL, + NULL, + num_biases, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples); +} + +int test_negative_sampling_biased(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t num_biases = 6; + size_t num_samples = 10; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + weight_t src_bias[] = {1, 1, 2, 2, 1, 1}; + weight_t dst_bias[] = {2, 2, 1, 1, 1, 1}; + vertex_t vertices[] = {0, 1, 2, 3, 4, 5}; + + bool_t remove_duplicates = FALSE; + bool_t remove_false_negatives = TRUE; + bool_t exact_number_of_samples = FALSE; + + return generic_negative_sampling_test(handle, + src, + dst, + num_vertices, + num_edges, + num_samples, + vertices, + src_bias, + dst_bias, + num_biases, + remove_duplicates, + remove_false_negatives, + exact_number_of_samples); +} + +int main(int argc, char** argv) +{ + cugraph_resource_handle_t* handle = NULL; + + handle = cugraph_create_resource_handle(NULL); + + int result = 0; + result |= RUN_TEST_NEW(test_negative_sampling_uniform, handle); + result |= RUN_TEST_NEW(test_negative_sampling_biased, handle); + + cugraph_free_resource_handle(handle); + + return result; +} diff --git a/cpp/tests/c_api/sg_random_walks_test.c b/cpp/tests/c_api/sg_random_walks_test.c index 14108d91c04..a4a77b5775a 100644 --- a/cpp/tests/c_api/sg_random_walks_test.c +++ b/cpp/tests/c_api/sg_random_walks_test.c @@ -192,9 +192,6 @@ int generic_biased_random_walks_test(vertex_t* h_src, ret_code = cugraph_biased_random_walks(handle, graph, d_start_view, max_depth, &result, &ret_error); -#if 1 - TEST_ASSERT(test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_random_walks should have failed") -#else TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_random_walks failed."); @@ -208,10 +205,10 @@ int generic_biased_random_walks_test(vertex_t* h_src, size_t wgts_size = cugraph_type_erased_device_array_view_size(wgts); vertex_t h_result_verts[verts_size]; - vertex_t h_result_wgts[wgts_size]; + weight_t h_result_wgts[wgts_size]; - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_verts, verts, &ret_error); + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_verts, verts, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); ret_code = cugraph_type_erased_device_array_view_copy_to_host( @@ -231,23 +228,35 @@ int generic_biased_random_walks_test(vertex_t* h_src, M[h_src[i]][h_dst[i]] = h_wgt[i]; TEST_ASSERT(test_ret_value, - cugraph_random_walk_result_get_max_path_length() == max_depth, + cugraph_random_walk_result_get_max_path_length(result) == max_depth, "path length does not match"); for (int i = 0; (i < num_starts) && (test_ret_value == 0); ++i) { - TEST_ASSERT(test_ret_value, - M[h_start[i]][h_result_verts[i * (max_depth + 1)]] == h_result_wgts[i * max_depth], - "biased_random_walks got edge that doesn't exist"); - for (size_t j = 1; j < cugraph_random_walk_result_get_max_path_length(); ++j) - TEST_ASSERT( - test_ret_value, - M[h_start[i * (max_depth + 1) + j - 1]][h_result_verts[i * (max_depth + 1) + j]] == - h_result_wgts[i * max_depth + j - 1], - "biased_random_walks got edge that doesn't exist"); + TEST_ASSERT( + test_ret_value, h_start[i] == h_result_verts[i * (max_depth + 1)], "start of path not found"); + for (size_t j = 0; j < max_depth; ++j) { + int src_index = i * (max_depth + 1) + j; + int dst_index = src_index + 1; + if (h_result_verts[dst_index] < 0) { + if (h_result_verts[src_index] >= 0) { + int departing_count = 0; + for (int k = 0; k < num_vertices; ++k) { + if (M[h_result_verts[src_index]][k] >= 0) departing_count++; + } + TEST_ASSERT(test_ret_value, + departing_count == 0, + "biased_random_walks found no edge when an edge exists"); + } + } else { + TEST_ASSERT(test_ret_value, + M[h_result_verts[src_index]][h_result_verts[dst_index]] == + h_result_wgts[i * max_depth + j], + "biased_random_walks got edge that doesn't exist"); + } + } } cugraph_random_walk_result_free(result); -#endif cugraph_sg_graph_free(graph); cugraph_free_resource_handle(handle); @@ -302,10 +311,6 @@ int generic_node2vec_random_walks_test(vertex_t* h_src, ret_code = cugraph_node2vec_random_walks( handle, graph, d_start_view, max_depth, p, q, &result, &ret_error); -#if 1 - TEST_ASSERT( - test_ret_value, ret_code != CUGRAPH_SUCCESS, "node2vec_random_walks should have failed") -#else TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "node2vec_random_walks failed."); @@ -319,10 +324,10 @@ int generic_node2vec_random_walks_test(vertex_t* h_src, size_t wgts_size = cugraph_type_erased_device_array_view_size(wgts); vertex_t h_result_verts[verts_size]; - vertex_t h_result_wgts[wgts_size]; + weight_t h_result_wgts[wgts_size]; - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_verts, verts, &ret_error); + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_verts, verts, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); ret_code = cugraph_type_erased_device_array_view_copy_to_host( @@ -342,23 +347,35 @@ int generic_node2vec_random_walks_test(vertex_t* h_src, M[h_src[i]][h_dst[i]] = h_wgt[i]; TEST_ASSERT(test_ret_value, - cugraph_random_walk_result_get_max_path_length() == max_depth, + cugraph_random_walk_result_get_max_path_length(result) == max_depth, "path length does not match"); for (int i = 0; (i < num_starts) && (test_ret_value == 0); ++i) { - TEST_ASSERT(test_ret_value, - M[h_start[i]][h_result_verts[i * (max_depth + 1)]] == h_result_wgts[i * max_depth], - "node2vec_random_walks got edge that doesn't exist"); - for (size_t j = 1; j < max_depth; ++j) - TEST_ASSERT( - test_ret_value, - M[h_start[i * (max_depth + 1) + j - 1]][h_result_verts[i * (max_depth + 1) + j]] == - h_result_wgts[i * max_depth + j - 1], - "node2vec_random_walks got edge that doesn't exist"); + TEST_ASSERT( + test_ret_value, h_start[i] == h_result_verts[i * (max_depth + 1)], "start of path not found"); + for (size_t j = 0; j < max_depth; ++j) { + int src_index = i * (max_depth + 1) + j; + int dst_index = src_index + 1; + if (h_result_verts[dst_index] < 0) { + if (h_result_verts[src_index] >= 0) { + int departing_count = 0; + for (int k = 0; k < num_vertices; ++k) { + if (M[h_result_verts[src_index]][k] >= 0) departing_count++; + } + TEST_ASSERT(test_ret_value, + departing_count == 0, + "node2vec_random_walks found no edge when an edge exists"); + } + } else { + TEST_ASSERT(test_ret_value, + M[h_result_verts[src_index]][h_result_verts[dst_index]] == + h_result_wgts[i * max_depth + j], + "node2vec_random_walks got edge that doesn't exist"); + } + } } cugraph_random_walk_result_free(result); -#endif cugraph_sg_graph_free(graph); cugraph_free_resource_handle(handle); @@ -390,7 +407,7 @@ int test_biased_random_walks() vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - weight_t wgt[] = {0, 1, 2, 3, 4, 5, 6, 7}; + weight_t wgt[] = {1, 2, 3, 4, 5, 6, 7, 8}; vertex_t start[] = {2, 2}; return generic_biased_random_walks_test( diff --git a/cpp/tests/mtmg/multi_node_threaded_test.cu b/cpp/tests/mtmg/multi_node_threaded_test.cu index 06ccd4a7fa1..374c432aac5 100644 --- a/cpp/tests/mtmg/multi_node_threaded_test.cu +++ b/cpp/tests/mtmg/multi_node_threaded_test.cu @@ -39,6 +39,7 @@ #include #include +#include #include #include diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index d77d8a7659e..f698701eb08 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -15,6 +15,7 @@ */ #include "prims/per_v_random_select_transform_outgoing_e.cuh" +#include "prims/transform_e.cuh" #include "prims/vertex_frontier.cuh" #include "utilities/base_fixture.hpp" #include "utilities/conversion_utilities.hpp" @@ -103,6 +104,7 @@ struct Prims_Usecase { bool with_replacement{false}; bool use_invalid_value{false}; bool use_weight_as_bias{false}; + bool inject_zero_bias{false}; // valid only when use_weight_as_bias is true bool edge_masking{false}; bool check_correctness{true}; }; @@ -159,6 +161,23 @@ class Tests_MGPerVRandomSelectTransformOutgoingE mg_graph_view.attach_edge_mask((*edge_mask).view()); } + if (mg_edge_weight_view && prims_usecase.inject_zero_bias) { + cugraph::transform_e( + *handle_, + mg_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *mg_edge_weight_view, + [] __device__(auto src, auto dst, auto, auto, auto w) { + if ((src % 2) == 0 && (dst % 2) == 0) { + return weight_t{0.0}; + } else { + return w; + } + }, + (*mg_edge_weights).mutable_view()); + } + // 2. run MG per_v_random_select_transform_outgoing_e primitive const int hash_bin_count = 5; @@ -324,11 +343,14 @@ class Tests_MGPerVRandomSelectTransformOutgoingE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + std::optional< + cugraph::edge_property_t, weight_t>> + sg_edge_weights{std::nullopt}; + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, - std::optional>{std::nullopt}, + mg_edge_weight_view, std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), @@ -347,6 +369,8 @@ class Tests_MGPerVRandomSelectTransformOutgoingE } auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; rmm::device_uvector sg_offsets(sg_graph_view.number_of_vertices() + vertex_t{1}, handle_->get_stream()); @@ -361,6 +385,17 @@ class Tests_MGPerVRandomSelectTransformOutgoingE sg_graph_view.local_edge_partition_view().indices().end(), sg_indices.begin()); + std::optional> sg_biases{std::nullopt}; + if (sg_edge_weight_view) { + auto firsts = (*sg_edge_weight_view).value_firsts(); + auto counts = (*sg_edge_weight_view).edge_counts(); + assert(firsts.size() == 1); + assert(counts.size() == 1); + sg_biases = rmm::device_uvector(counts[0], handle_->get_stream()); + thrust::copy( + handle_->get_thrust_policy(), firsts[0], firsts[0] + counts[0], (*sg_biases).begin()); + } + auto num_invalids = static_cast(thrust::count_if( handle_->get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), @@ -371,9 +406,10 @@ class Tests_MGPerVRandomSelectTransformOutgoingE : thrust::nullopt, sample_e_op_result_first = cugraph::get_dataframe_buffer_begin(mg_aggregate_sample_e_op_results), - sg_offsets = sg_offsets.begin(), - sg_indices = sg_indices.begin(), - K = prims_usecase.K, + sg_offsets = sg_offsets.begin(), + sg_indices = sg_indices.begin(), + sg_biases = sg_biases ? thrust::make_optional((*sg_biases).begin()) : thrust::nullopt, + K = prims_usecase.K, with_replacement = prims_usecase.with_replacement, invalid_value = invalid_value ? thrust::make_optional(*invalid_value) : thrust::nullopt, @@ -402,6 +438,12 @@ class Tests_MGPerVRandomSelectTransformOutgoingE auto count = offset_last - offset_first; auto out_degree = *(sg_offsets + v + 1) - *(sg_offsets + v); + if (sg_biases) { + out_degree = thrust::count_if(thrust::seq, + *sg_biases + *(sg_offsets + v), + *sg_biases + *(sg_offsets + v + 1), + [] __device__(auto bias) { return bias > 0.0; }); + } if (with_replacement) { if ((out_degree > 0 && count != K) || (out_degree == 0 && count != 0)) { return true; @@ -418,12 +460,33 @@ class Tests_MGPerVRandomSelectTransformOutgoingE auto sg_dst = thrust::get<1>(e_op_result); auto sg_nbr_first = sg_indices + *(sg_offsets + sg_src); auto sg_nbr_last = sg_indices + *(sg_offsets + (sg_src + vertex_t{1})); - if (!thrust::binary_search(thrust::seq, - sg_nbr_first, - sg_nbr_last, - sg_dst)) { // assumed neighbor lists are sorted - return true; + auto sg_nbr_bias_first = + sg_biases ? thrust::make_optional((*sg_biases) + *(sg_offsets + sg_src)) + : thrust::nullopt; + if (sg_src != v) { return true; } + + if (sg_nbr_bias_first) { + auto lower_it = thrust::lower_bound(thrust::seq, sg_nbr_first, sg_nbr_last, sg_dst); + auto upper_it = thrust::upper_bound(thrust::seq, sg_nbr_first, sg_nbr_last, sg_dst); + bool found = false; + for (auto it = (*sg_nbr_bias_first + thrust::distance(sg_nbr_first, lower_it)); + it != (*sg_nbr_bias_first + thrust::distance(sg_nbr_first, upper_it)); + ++it) { + if (*it > 0.0) { + found = true; + break; + } + } + if (!found) { return true; } + } else { + if (!thrust::binary_search(thrust::seq, + sg_nbr_first, + sg_nbr_last, + sg_dst)) { // assumed neighbor lists are sorted + return true; + } } + property_t src_val{}; property_t dst_val{}; if constexpr (cugraph::is_thrust_tuple_of_arithmetic::value) { @@ -443,20 +506,25 @@ class Tests_MGPerVRandomSelectTransformOutgoingE thrust::get<1>(sample_e_op_result_first.get_iterator_tuple()) + offset_first; auto sg_dst_last = thrust::get<1>(sample_e_op_result_first.get_iterator_tuple()) + offset_last; - auto dst_count = - thrust::count(thrust::seq, - sg_dst_first, - sg_dst_last, - sg_dst); // this could be inefficient for high-degree vertices, if - // we sort [sg_dst_first, sg_dst_last) we can use binary - // search but we may better not modify the sampling output - // and allow inefficiency as this is just for testing - auto multiplicity = thrust::distance( - thrust::lower_bound(thrust::seq, sg_nbr_first, sg_nbr_last, sg_dst), + auto dst_count = thrust::count(thrust::seq, sg_dst_first, sg_dst_last, sg_dst); + auto lower_it = + thrust::lower_bound(thrust::seq, + sg_nbr_first, + sg_nbr_last, + sg_dst); // this assumes neighbor lists are sorted + auto upper_it = thrust::upper_bound(thrust::seq, sg_nbr_first, sg_nbr_last, - sg_dst)); // this assumes neighbor lists are sorted + sg_dst); // this assumes neighbor lists are sorted + auto multiplicity = + sg_nbr_bias_first + ? thrust::count_if( + thrust::seq, + *sg_nbr_bias_first + thrust::distance(sg_nbr_first, lower_it), + *sg_nbr_bias_first + thrust::distance(sg_nbr_first, upper_it), + [] __device__(auto bias) { return bias > 0.0; }) + : thrust::distance(lower_it, upper_it); if (dst_count > multiplicity) { return true; } } } @@ -547,44 +615,60 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVRandomSelectTransformOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true}), + ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( file_large_test, Tests_MGPerVRandomSelectTransformOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true}), + ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); @@ -593,22 +677,30 @@ INSTANTIATE_TEST_SUITE_P( rmat_small_test, Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, true}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false}, - Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true}), + ::testing::Values(Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, false, true, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, false, true, true, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, false, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, false}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, false, true}, + Prims_Usecase{size_t{1000}, size_t{4}, true, true, true, true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( @@ -620,22 +712,30 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGPerVRandomSelectTransformOutgoingE_Rmat, ::testing::Combine( ::testing::Values( - Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, true, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, false, false}, - Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, true, false}), + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, false, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, true, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, false, true, true, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, false, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, true, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, false, true, true, true, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, false, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, true, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, false, true, true, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, false, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, false, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, true, false, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, false, true, false}, + Prims_Usecase{size_t{10000000}, size_t{25}, true, true, true, true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/detail/nbr_sampling_validate.cu b/cpp/tests/sampling/detail/nbr_sampling_validate.cu index 61731e2e15c..70828e559f1 100644 --- a/cpp/tests/sampling/detail/nbr_sampling_validate.cu +++ b/cpp/tests/sampling/detail/nbr_sampling_validate.cu @@ -75,6 +75,8 @@ struct ArithmeticZipLess { } else { return thrust::get<1>(left) < thrust::get<1>(right); } + } else { + return false; } } }; diff --git a/cpp/tests/sampling/detail/sampling_post_processing_validate.cu b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu new file mode 100644 index 00000000000..a0babc3b921 --- /dev/null +++ b/cpp/tests/sampling/detail/sampling_post_processing_validate.cu @@ -0,0 +1,1738 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +bool check_offsets(raft::handle_t const& handle, + raft::device_span offsets, + index_t num_segments, + index_t num_elements) +{ + if (offsets.size() != num_segments + 1) { return false; } + + if (!thrust::is_sorted(handle.get_thrust_policy(), offsets.begin(), offsets.end())) { + return false; + } + + index_t front_element{}; + index_t back_element{}; + raft::update_host(&front_element, offsets.data(), index_t{1}, handle.get_stream()); + raft::update_host( + &back_element, offsets.data() + offsets.size() - 1, index_t{1}, handle.get_stream()); + handle.sync_stream(); + + if (front_element != index_t{0}) { return false; } + + if (back_element != num_elements) { return false; } + + return true; +} + +template bool check_offsets(raft::handle_t const& handle, + raft::device_span offsets, + size_t num_segments, + size_t num_elements); + +template +bool check_edgelist_is_sorted(raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors) +{ + auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); + return thrust::is_sorted( + handle.get_thrust_policy(), edge_first, edge_first + edgelist_majors.size()); +} + +template bool check_edgelist_is_sorted(raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors); + +template bool check_edgelist_is_sorted(raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors); + +// unrenumber the renumbered edge list and check whether the original & unrenumbered edge lists are +// identical +template +bool compare_edgelist(raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels) +{ + if (org_edgelist_srcs.size() != renumbered_edgelist_srcs.size()) { return false; } + + for (size_t i = 0; i < num_labels; ++i) { + size_t label_start_offset{0}; + size_t label_end_offset = org_edgelist_srcs.size(); + if (org_edgelist_label_offsets) { + raft::update_host(&label_start_offset, + (*org_edgelist_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&label_end_offset, + (*org_edgelist_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (label_start_offset == label_end_offset) { continue; } + + rmm::device_uvector this_label_sorted_org_edgelist_srcs( + label_end_offset - label_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_srcs.begin() + label_start_offset, + org_edgelist_srcs.begin() + label_end_offset, + this_label_sorted_org_edgelist_srcs.begin()); + rmm::device_uvector this_label_sorted_org_edgelist_dsts(org_edgelist_dsts.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_dsts.begin() + label_start_offset, + org_edgelist_dsts.begin() + label_end_offset, + this_label_sorted_org_edgelist_dsts.begin()); + auto this_label_sorted_org_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (this_label_sorted_org_edgelist_weights) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_weights).begin() + label_start_offset, + (*org_edgelist_weights).begin() + label_end_offset, + (*this_label_sorted_org_edgelist_weights).begin()); + } + + if (this_label_sorted_org_edgelist_weights) { + auto sorted_org_edge_first = + thrust::make_zip_iterator(this_label_sorted_org_edgelist_srcs.begin(), + this_label_sorted_org_edgelist_dsts.begin(), + (*this_label_sorted_org_edgelist_weights).begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + this_label_sorted_org_edgelist_srcs.size()); + } else { + auto sorted_org_edge_first = thrust::make_zip_iterator( + this_label_sorted_org_edgelist_srcs.begin(), this_label_sorted_org_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + this_label_sorted_org_edgelist_srcs.size()); + } + + rmm::device_uvector this_label_sorted_unrenumbered_edgelist_srcs( + label_end_offset - label_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_srcs.begin() + label_start_offset, + renumbered_edgelist_srcs.begin() + label_end_offset, + this_label_sorted_unrenumbered_edgelist_srcs.begin()); + rmm::device_uvector this_label_sorted_unrenumbered_edgelist_dsts( + label_end_offset - label_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_dsts.begin() + label_start_offset, + renumbered_edgelist_dsts.begin() + label_end_offset, + this_label_sorted_unrenumbered_edgelist_dsts.begin()); + auto this_label_sorted_unrenumbered_edgelist_weights = + renumbered_edgelist_weights ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (this_label_sorted_unrenumbered_edgelist_weights) { + thrust::copy(handle.get_thrust_policy(), + (*renumbered_edgelist_weights).begin() + label_start_offset, + (*renumbered_edgelist_weights).begin() + label_end_offset, + (*this_label_sorted_unrenumbered_edgelist_weights).begin()); + } + + if (renumber_map) { + size_t renumber_map_label_start_offset{0}; + size_t renumber_map_label_end_offset = (*renumber_map).size(); + if (renumber_map_label_offsets) { + raft::update_host(&renumber_map_label_start_offset, + (*renumber_map_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&renumber_map_label_end_offset, + (*renumber_map_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + cugraph::unrenumber_int_vertices( + handle, + this_label_sorted_unrenumbered_edgelist_srcs.data(), + this_label_sorted_unrenumbered_edgelist_srcs.size(), + (*renumber_map).data() + renumber_map_label_start_offset, + std::vector{ + static_cast(renumber_map_label_end_offset - renumber_map_label_start_offset)}); + cugraph::unrenumber_int_vertices( + handle, + this_label_sorted_unrenumbered_edgelist_dsts.data(), + this_label_sorted_unrenumbered_edgelist_dsts.size(), + (*renumber_map).data() + renumber_map_label_start_offset, + std::vector{ + static_cast(renumber_map_label_end_offset - renumber_map_label_start_offset)}); + } + + if (this_label_sorted_unrenumbered_edgelist_weights) { + auto sorted_unrenumbered_edge_first = + thrust::make_zip_iterator(this_label_sorted_unrenumbered_edgelist_srcs.begin(), + this_label_sorted_unrenumbered_edgelist_dsts.begin(), + (*this_label_sorted_unrenumbered_edgelist_weights).begin()); + thrust::sort( + handle.get_thrust_policy(), + sorted_unrenumbered_edge_first, + sorted_unrenumbered_edge_first + this_label_sorted_unrenumbered_edgelist_srcs.size()); + + auto sorted_org_edge_first = + thrust::make_zip_iterator(this_label_sorted_org_edgelist_srcs.begin(), + this_label_sorted_org_edgelist_dsts.begin(), + (*this_label_sorted_org_edgelist_weights).begin()); + if (!thrust::equal(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + this_label_sorted_org_edgelist_srcs.size(), + sorted_unrenumbered_edge_first)) { + return false; + } + } else { + auto sorted_unrenumbered_edge_first = + thrust::make_zip_iterator(this_label_sorted_unrenumbered_edgelist_srcs.begin(), + this_label_sorted_unrenumbered_edgelist_dsts.begin()); + thrust::sort( + handle.get_thrust_policy(), + sorted_unrenumbered_edge_first, + sorted_unrenumbered_edge_first + this_label_sorted_unrenumbered_edgelist_srcs.size()); + + auto sorted_org_edge_first = thrust::make_zip_iterator( + this_label_sorted_org_edgelist_srcs.begin(), this_label_sorted_org_edgelist_dsts.begin()); + if (!thrust::equal(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + this_label_sorted_org_edgelist_srcs.size(), + sorted_unrenumbered_edge_first)) { + return false; + } + } + } + + return true; +} + +template bool compare_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels); + +template bool compare_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels); + +template bool compare_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels); + +template bool compare_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels); + +// unrenumber the renumbered edge list and check whether the original & unrenumbered edge lists +// are identical +template +bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops) +{ + if (org_edgelist_srcs.size() != renumbered_edgelist_srcs.size()) { return false; } + + for (size_t i = 0; i < num_labels; ++i) { + size_t label_start_offset{0}; + size_t label_end_offset = org_edgelist_srcs.size(); + if (org_edgelist_label_offsets) { + raft::update_host(&label_start_offset, + (*org_edgelist_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&label_end_offset, + (*org_edgelist_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (label_start_offset == label_end_offset) { continue; } + + if (renumbered_edgelist_label_edge_type_hop_offsets) { + size_t renumbered_label_start_offset{0}; + size_t renumbered_label_end_offset{0}; + raft::update_host( + &renumbered_label_start_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + i * num_edge_types * num_hops, + size_t{1}, + handle.get_stream()); + raft::update_host(&renumbered_label_end_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + (i + 1) * num_edge_types * num_hops, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + if (renumbered_label_start_offset != label_start_offset) { return false; } + if (renumbered_label_end_offset != label_end_offset) { return false; } + } + + // sort org edgelist by ((edge_type), (hop), src, dst, (weight), (edge ID)) + + rmm::device_uvector this_label_org_sorted_indices(label_end_offset - label_start_offset, + handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + this_label_org_sorted_indices.begin(), + this_label_org_sorted_indices.end(), + size_t{0}); + + thrust::sort( + handle.get_thrust_policy(), + this_label_org_sorted_indices.begin(), + this_label_org_sorted_indices.end(), + [edge_types = org_edgelist_edge_types + ? thrust::make_optional>( + (*org_edgelist_edge_types).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt, + hops = org_edgelist_hops ? thrust::make_optional>( + (*org_edgelist_hops).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt, + srcs = raft::device_span(org_edgelist_srcs.data() + label_start_offset, + label_end_offset - label_start_offset), + dsts = raft::device_span(org_edgelist_dsts.data() + label_start_offset, + label_end_offset - label_start_offset), + weights = org_edgelist_weights ? thrust::make_optional>( + (*org_edgelist_weights).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt, + edge_ids = org_edgelist_edge_ids ? thrust::make_optional>( + (*org_edgelist_edge_ids).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + edge_type_t l_edge_type{0}; + edge_type_t r_edge_type{0}; + if (edge_types) { + l_edge_type = (*edge_types)[l_idx]; + r_edge_type = (*edge_types)[r_idx]; + } + + int32_t l_hop{0}; + int32_t r_hop{0}; + if (hops) { + l_hop = (*hops)[l_idx]; + r_hop = (*hops)[r_idx]; + } + + vertex_t l_src = srcs[l_idx]; + vertex_t r_src = srcs[r_idx]; + + vertex_t l_dst = dsts[l_idx]; + vertex_t r_dst = dsts[r_idx]; + + weight_t l_weight{0.0}; + weight_t r_weight{0.0}; + if (weights) { + l_weight = (*weights)[l_idx]; + r_weight = (*weights)[r_idx]; + } + + edge_id_t l_edge_id{0}; + edge_id_t r_edge_id{0}; + if (edge_ids) { + l_edge_id = (*edge_ids)[l_idx]; + r_edge_id = (*edge_ids)[r_idx]; + } + + return thrust::make_tuple(l_edge_type, l_hop, l_src, l_dst, l_weight, l_edge_id) < + thrust::make_tuple(r_edge_type, r_hop, r_src, r_dst, r_weight, r_edge_id); + }); + + for (size_t j = 0; j < num_edge_types; ++j) { + auto edge_type_start_offset = label_start_offset; + auto edge_type_end_offset = label_end_offset; + if (renumbered_edgelist_label_edge_type_hop_offsets) { + raft::update_host(&edge_type_start_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + j * num_hops, + size_t{1}, + handle.get_stream()); + raft::update_host(&edge_type_end_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + (j + 1) * num_hops, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (edge_type_start_offset == edge_type_end_offset) { continue; } + + if (org_edgelist_edge_types) { + if (static_cast(thrust::count_if( + handle.get_thrust_policy(), + this_label_org_sorted_indices.begin() + (edge_type_start_offset - label_start_offset), + this_label_org_sorted_indices.begin() + (edge_type_end_offset - label_start_offset), + [edge_types = raft::device_span( + (*org_edgelist_edge_types).data() + label_start_offset, + label_end_offset - label_start_offset), + edge_type = static_cast(j)] __device__(auto i) { + return edge_types[i] == edge_type; + })) != edge_type_end_offset - edge_type_start_offset) { + return false; + } + } + + if (org_edgelist_hops) { + for (size_t k = 0; k < num_hops; ++k) { + auto hop_start_offset = edge_type_start_offset; + auto hop_end_offset = edge_type_end_offset; + if (renumbered_edgelist_label_edge_type_hop_offsets) { + raft::update_host(&hop_start_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + j * num_hops + k, + size_t{1}, + handle.get_stream()); + raft::update_host(&hop_end_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + j * num_hops + k + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (hop_start_offset == hop_end_offset) { continue; } + + if (static_cast(thrust::count_if( + handle.get_thrust_policy(), + this_label_org_sorted_indices.begin() + (hop_start_offset - label_start_offset), + this_label_org_sorted_indices.begin() + (hop_end_offset - label_start_offset), + [hops = raft::device_span( + (*org_edgelist_hops).data() + label_start_offset, + label_end_offset - label_start_offset), + hop = static_cast(k)] __device__(auto i) { return hops[i] == hop; })) != + hop_end_offset - hop_start_offset) { + return false; + } + } + } + + // unrenumber source vertex IDs + + rmm::device_uvector this_edge_type_unrenumbered_edgelist_srcs( + edge_type_end_offset - edge_type_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_srcs.begin() + edge_type_start_offset, + renumbered_edgelist_srcs.begin() + edge_type_end_offset, + this_edge_type_unrenumbered_edgelist_srcs.begin()); + { + vertex_t org_src{}; + raft::update_host(&org_src, + org_edgelist_srcs.data() + label_start_offset + + this_label_org_sorted_indices.element( + edge_type_start_offset - label_start_offset, handle.get_stream()), + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + auto vertex_type = thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(handle.get_thrust_policy(), + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + org_src)); + size_t renumber_map_label_start_offset{}; + size_t renumber_map_label_end_offset{}; + raft::update_host( + &renumber_map_label_start_offset, + vertex_renumber_map_label_type_offsets.data() + i * num_vertex_types + vertex_type, + size_t{1}, + handle.get_stream()); + raft::update_host( + &renumber_map_label_end_offset, + vertex_renumber_map_label_type_offsets.data() + i * num_vertex_types + vertex_type + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + auto renumber_map = raft::device_span( + vertex_renumber_map.data() + renumber_map_label_start_offset, + renumber_map_label_end_offset - renumber_map_label_start_offset); + cugraph::unrenumber_int_vertices( + handle, + this_edge_type_unrenumbered_edgelist_srcs.data(), + edge_type_end_offset - edge_type_start_offset, + renumber_map.data(), + std::vector{static_cast(renumber_map.size())}); + } + + // unrenumber destination vertex IDs + + rmm::device_uvector this_edge_type_unrenumbered_edgelist_dsts( + edge_type_end_offset - edge_type_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_dsts.begin() + edge_type_start_offset, + renumbered_edgelist_dsts.begin() + edge_type_end_offset, + this_edge_type_unrenumbered_edgelist_dsts.begin()); + { + vertex_t org_dst{}; + raft::update_host(&org_dst, + org_edgelist_dsts.data() + label_start_offset + + this_label_org_sorted_indices.element( + edge_type_start_offset - label_start_offset, handle.get_stream()), + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + auto vertex_type = thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(handle.get_thrust_policy(), + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + org_dst)); + size_t renumber_map_label_start_offset{0}; + size_t renumber_map_label_end_offset{}; + raft::update_host( + &renumber_map_label_start_offset, + vertex_renumber_map_label_type_offsets.data() + i * num_vertex_types + vertex_type, + size_t{1}, + handle.get_stream()); + raft::update_host( + &renumber_map_label_end_offset, + vertex_renumber_map_label_type_offsets.data() + i * num_vertex_types + vertex_type + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + auto renumber_map = raft::device_span( + vertex_renumber_map.data() + renumber_map_label_start_offset, + renumber_map_label_end_offset - renumber_map_label_start_offset); + cugraph::unrenumber_int_vertices( + handle, + this_edge_type_unrenumbered_edgelist_dsts.data(), + edge_type_end_offset - edge_type_start_offset, + renumber_map.data(), + std::vector{static_cast(renumber_map.size())}); + } + + // unrenumber edge IDs + + std::optional> unrenumbered_edgelist_edge_ids{std::nullopt}; + if (renumbered_edgelist_edge_ids) { + unrenumbered_edgelist_edge_ids = rmm::device_uvector( + edge_type_end_offset - edge_type_start_offset, handle.get_stream()); + size_t renumber_map_type_start_offset{0}; + size_t renumber_map_type_end_offset = (*edge_id_renumber_map).size(); + if (edge_id_renumber_map_label_type_offsets) { + raft::update_host(&renumber_map_type_start_offset, + (*edge_id_renumber_map_label_type_offsets).data() + i * num_edge_types + + static_cast(j), + size_t{1}, + handle.get_stream()); + raft::update_host(&renumber_map_type_end_offset, + (*edge_id_renumber_map_label_type_offsets).data() + i * num_edge_types + + static_cast(j) + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + auto renumber_map = raft::device_span( + (*edge_id_renumber_map).data() + renumber_map_type_start_offset, + renumber_map_type_end_offset - renumber_map_type_start_offset); + thrust::gather(handle.get_thrust_policy(), + (*renumbered_edgelist_edge_ids).begin() + edge_type_start_offset, + (*renumbered_edgelist_edge_ids).begin() + edge_type_end_offset, + renumber_map.begin(), + (*unrenumbered_edgelist_edge_ids).begin()); + } + + // sort sorted & renumbered edgelist by (src, dst, (weight), (edge ID)) + + rmm::device_uvector this_edge_type_unrenumbered_sorted_indices( + edge_type_end_offset - edge_type_start_offset, handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + this_edge_type_unrenumbered_sorted_indices.begin(), + this_edge_type_unrenumbered_sorted_indices.end(), + size_t{0}); + + for (size_t k = 0; k < num_hops; ++k) { + auto hop_start_offset = edge_type_start_offset; + auto hop_end_offset = edge_type_end_offset; + if (renumbered_edgelist_label_edge_type_hop_offsets) { + raft::update_host(&hop_start_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + j * num_hops + k, + size_t{1}, + handle.get_stream()); + raft::update_host(&hop_end_offset, + (*renumbered_edgelist_label_edge_type_hop_offsets).data() + + i * num_edge_types * num_hops + j * num_hops + k + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (hop_start_offset == hop_end_offset) { continue; } + + thrust::sort( + handle.get_thrust_policy(), + this_edge_type_unrenumbered_sorted_indices.begin() + + (hop_start_offset - edge_type_start_offset), + this_edge_type_unrenumbered_sorted_indices.begin() + + (hop_end_offset - edge_type_start_offset), + [srcs = + raft::device_span(this_edge_type_unrenumbered_edgelist_srcs.data(), + this_edge_type_unrenumbered_edgelist_srcs.size()), + dsts = + raft::device_span(this_edge_type_unrenumbered_edgelist_dsts.data(), + this_edge_type_unrenumbered_edgelist_dsts.size()), + weights = renumbered_edgelist_weights + ? thrust::make_optional>( + (*renumbered_edgelist_weights).data() + edge_type_start_offset, + edge_type_end_offset - edge_type_start_offset) + : thrust::nullopt, + edge_ids = renumbered_edgelist_edge_ids + ? thrust::make_optional>( + (*renumbered_edgelist_edge_ids).data() + edge_type_start_offset, + edge_type_end_offset - edge_type_start_offset) + : thrust::nullopt] __device__(size_t l_idx, size_t r_idx) { + vertex_t l_src = srcs[l_idx]; + vertex_t r_src = srcs[r_idx]; + + vertex_t l_dst = dsts[l_idx]; + vertex_t r_dst = dsts[r_idx]; + + weight_t l_weight{0.0}; + weight_t r_weight{0.0}; + if (weights) { + l_weight = (*weights)[l_idx]; + r_weight = (*weights)[r_idx]; + } + + edge_id_t l_edge_id{0}; + edge_id_t r_edge_id{0}; + if (edge_ids) { + l_edge_id = (*edge_ids)[l_idx]; + r_edge_id = (*edge_ids)[r_idx]; + } + + return thrust::make_tuple(l_src, l_dst, l_weight, l_edge_id) < + thrust::make_tuple(r_src, r_dst, r_weight, r_edge_id); + }); + } + + // compare + + if (!thrust::equal( + handle.get_thrust_policy(), + this_label_org_sorted_indices.begin() + (edge_type_start_offset - label_start_offset), + this_label_org_sorted_indices.begin() + (edge_type_end_offset - label_start_offset), + this_edge_type_unrenumbered_sorted_indices.begin(), + [org_srcs = + raft::device_span(org_edgelist_srcs.data() + label_start_offset, + label_end_offset - label_start_offset), + org_dsts = + raft::device_span(org_edgelist_dsts.data() + label_start_offset, + label_end_offset - label_start_offset), + org_weights = org_edgelist_weights + ? thrust::make_optional>( + (*org_edgelist_weights).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt, + org_edge_ids = org_edgelist_edge_ids + ? thrust::make_optional>( + (*org_edgelist_edge_ids).data() + label_start_offset, + label_end_offset - label_start_offset) + : thrust::nullopt, + unrenumbered_srcs = + raft::device_span(this_edge_type_unrenumbered_edgelist_srcs.data(), + this_edge_type_unrenumbered_edgelist_srcs.size()), + unrenumbered_dsts = + raft::device_span(this_edge_type_unrenumbered_edgelist_dsts.data(), + this_edge_type_unrenumbered_edgelist_dsts.size()), + unrenumbered_weights = + renumbered_edgelist_weights + ? thrust::make_optional>( + (*renumbered_edgelist_weights).data() + edge_type_start_offset, + edge_type_end_offset - edge_type_start_offset) + : thrust::nullopt, + unrenumbered_edge_ids = + unrenumbered_edgelist_edge_ids + ? thrust::make_optional>( + (*unrenumbered_edgelist_edge_ids).data(), + (*unrenumbered_edgelist_edge_ids).size()) + : thrust:: + nullopt] __device__(size_t org_idx /* from label_start_offset */, + size_t + unrenumbered_idx /* from edge_type_start_offset */) { + auto org_src = org_srcs[org_idx]; + auto unrenumbered_src = unrenumbered_srcs[unrenumbered_idx]; + if (org_src != unrenumbered_src) { return false; } + + auto org_dst = org_dsts[org_idx]; + auto unrenumbered_dst = unrenumbered_dsts[unrenumbered_idx]; + if (org_dst != unrenumbered_dst) { return false; } + + weight_t org_weight{0.0}; + if (org_weights) { org_weight = (*org_weights)[org_idx]; } + weight_t unrenumbered_weight{0.0}; + if (unrenumbered_weights) { + unrenumbered_weight = (*unrenumbered_weights)[unrenumbered_idx]; + } + if (org_weight != unrenumbered_weight) { return false; } + + edge_id_t org_edge_id{0}; + if (org_edge_ids) { org_edge_id = (*org_edge_ids)[org_idx]; } + edge_id_t unrenumbered_edge_id{0}; + if (unrenumbered_edge_ids) { + unrenumbered_edge_id = (*unrenumbered_edge_ids)[unrenumbered_idx]; + } + + return org_edge_id == unrenumbered_edge_id; + })) { + return false; + } + } + } + + return true; +} + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template +bool check_vertex_renumber_map_invariants( + raft::handle_t const& handle, + std::optional> starting_vertices, + std::optional> starting_vertex_label_offsets, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + std::optional> vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + bool src_is_major) +{ + // Check the invariants in renumber_map + // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique vertices, + // where flag is 0 for majors and 1 for minors. Then, vertices with smaller (hop, flag) + // pairs should be renumbered to smaller numbers than vertices with larger (hop, flag) pairs. + auto org_edgelist_majors = src_is_major ? org_edgelist_srcs : org_edgelist_dsts; + auto org_edgelist_minors = src_is_major ? org_edgelist_dsts : org_edgelist_srcs; + + for (size_t i = 0; i < num_labels; ++i) { + size_t label_start_offset{0}; + auto label_end_offset = org_edgelist_majors.size(); + if (org_edgelist_label_offsets) { + raft::update_host(&label_start_offset, + (*org_edgelist_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&label_end_offset, + (*org_edgelist_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (label_start_offset == label_end_offset) { continue; } + + // compute (unique major, min_hop) pairs + + rmm::device_uvector this_label_unique_majors(label_end_offset - label_start_offset, + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_majors.begin() + label_start_offset, + org_edgelist_majors.begin() + label_end_offset, + this_label_unique_majors.begin()); + if (starting_vertices) { + size_t starting_vertex_label_start_offset{0}; + auto starting_vertex_label_end_offset = (*starting_vertices).size(); + if (starting_vertex_label_offsets) { + raft::update_host(&starting_vertex_label_start_offset, + (*starting_vertex_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&starting_vertex_label_end_offset, + (*starting_vertex_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + auto old_size = this_label_unique_majors.size(); + this_label_unique_majors.resize( + old_size + starting_vertex_label_end_offset - starting_vertex_label_start_offset, + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*starting_vertices).begin() + starting_vertex_label_start_offset, + (*starting_vertices).begin() + starting_vertex_label_end_offset, + this_label_unique_majors.begin() + old_size); + } + + std::optional> this_label_unique_major_hops = + org_edgelist_hops ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (org_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_hops).begin() + label_start_offset, + (*org_edgelist_hops).begin() + label_end_offset, + (*this_label_unique_major_hops).begin()); + if (starting_vertices) { + auto old_size = (*this_label_unique_major_hops).size(); + (*this_label_unique_major_hops) + .resize(this_label_unique_majors.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*this_label_unique_major_hops).begin() + old_size, + (*this_label_unique_major_hops).end(), + int32_t{0}); + } + + auto pair_first = thrust::make_zip_iterator(this_label_unique_majors.begin(), + (*this_label_unique_major_hops).begin()); + thrust::sort( + handle.get_thrust_policy(), pair_first, pair_first + this_label_unique_majors.size()); + this_label_unique_majors.resize(thrust::distance(this_label_unique_majors.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + this_label_unique_majors.begin(), + this_label_unique_majors.end(), + (*this_label_unique_major_hops).begin()))), + handle.get_stream()); + (*this_label_unique_major_hops).resize(this_label_unique_majors.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), + this_label_unique_majors.begin(), + this_label_unique_majors.end()); + this_label_unique_majors.resize( + thrust::distance(this_label_unique_majors.begin(), + thrust::unique(handle.get_thrust_policy(), + this_label_unique_majors.begin(), + this_label_unique_majors.end())), + handle.get_stream()); + } + + // compute (unique minor, min_hop) pairs + + rmm::device_uvector this_label_unique_minors(label_end_offset - label_start_offset, + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_minors.begin() + label_start_offset, + org_edgelist_minors.begin() + label_end_offset, + this_label_unique_minors.begin()); + std::optional> this_label_unique_minor_hops = + org_edgelist_hops ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (org_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_hops).begin() + label_start_offset, + (*org_edgelist_hops).begin() + label_end_offset, + (*this_label_unique_minor_hops).begin()); + + auto pair_first = thrust::make_zip_iterator(this_label_unique_minors.begin(), + (*this_label_unique_minor_hops).begin()); + thrust::sort( + handle.get_thrust_policy(), pair_first, pair_first + this_label_unique_minors.size()); + this_label_unique_minors.resize(thrust::distance(this_label_unique_minors.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + this_label_unique_minors.begin(), + this_label_unique_minors.end(), + (*this_label_unique_minor_hops).begin()))), + handle.get_stream()); + (*this_label_unique_minor_hops).resize(this_label_unique_minors.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), + this_label_unique_minors.begin(), + this_label_unique_minors.end()); + this_label_unique_minors.resize( + thrust::distance(this_label_unique_minors.begin(), + thrust::unique(handle.get_thrust_policy(), + this_label_unique_minors.begin(), + this_label_unique_minors.end())), + handle.get_stream()); + } + + for (size_t j = 0; j < num_vertex_types; ++j) { + size_t renumber_map_type_start_offset{0}; + auto renumber_map_type_end_offset = renumber_map.size(); + if (renumber_map_label_type_offsets) { + raft::update_host(&renumber_map_type_start_offset, + (*renumber_map_label_type_offsets).data() + i * num_vertex_types + j, + size_t{1}, + handle.get_stream()); + raft::update_host(&renumber_map_type_end_offset, + (*renumber_map_label_type_offsets).data() + i * num_vertex_types + j + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + rmm::device_uvector this_type_sorted_org_vertices( + renumber_map_type_end_offset - renumber_map_type_start_offset, handle.get_stream()); + rmm::device_uvector this_type_matching_renumbered_vertices( + this_type_sorted_org_vertices.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumber_map.begin() + renumber_map_type_start_offset, + renumber_map.begin() + renumber_map_type_end_offset, + this_type_sorted_org_vertices.begin()); + thrust::sequence(handle.get_thrust_policy(), + this_type_matching_renumbered_vertices.begin(), + this_type_matching_renumbered_vertices.end(), + vertex_t{0}); + thrust::sort_by_key(handle.get_thrust_policy(), + this_type_sorted_org_vertices.begin(), + this_type_sorted_org_vertices.end(), + this_type_matching_renumbered_vertices.begin()); + + rmm::device_uvector this_type_unique_majors(this_label_unique_majors.size(), + handle.get_stream()); + auto this_type_unique_major_hops = + this_label_unique_major_hops + ? std::make_optional>((*this_label_unique_major_hops).size(), + handle.get_stream()) + : std::nullopt; + rmm::device_uvector this_type_unique_minors(this_label_unique_minors.size(), + handle.get_stream()); + auto this_type_unique_minor_hops = + this_label_unique_minor_hops + ? std::make_optional>((*this_label_unique_minor_hops).size(), + handle.get_stream()) + : std::nullopt; + + if (org_edgelist_hops) { + if (vertex_type_offsets) { + auto input_pair_first = thrust::make_zip_iterator( + this_label_unique_majors.begin(), (*this_label_unique_major_hops).begin()); + auto output_pair_first = thrust::make_zip_iterator( + this_type_unique_majors.begin(), (*this_type_unique_major_hops).begin()); + this_type_unique_majors.resize( + thrust::distance( + output_pair_first, + thrust::copy_if(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + this_label_unique_majors.size(), + output_pair_first, + [vertex_type_offsets = *vertex_type_offsets, + vertex_type = j] __device__(auto pair) { + auto type_idx = thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<0>(pair))); + return static_cast(thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<0>(pair)))) == vertex_type; + })), + handle.get_stream()); + (*this_type_unique_major_hops) + .resize(this_type_unique_majors.size(), handle.get_stream()); + + input_pair_first = thrust::make_zip_iterator(this_label_unique_minors.begin(), + (*this_label_unique_minor_hops).begin()); + output_pair_first = thrust::make_zip_iterator(this_type_unique_minors.begin(), + (*this_type_unique_minor_hops).begin()); + this_type_unique_minors.resize( + thrust::distance( + output_pair_first, + thrust::copy_if(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + this_label_unique_minors.size(), + output_pair_first, + [vertex_type_offsets = *vertex_type_offsets, + vertex_type = j] __device__(auto pair) { + return static_cast(thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + thrust::get<0>(pair)))) == vertex_type; + })), + handle.get_stream()); + (*this_type_unique_minor_hops) + .resize(this_type_unique_minors.size(), handle.get_stream()); + } else { + auto input_pair_first = thrust::make_zip_iterator( + this_label_unique_majors.begin(), (*this_label_unique_major_hops).begin()); + thrust::copy(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + this_label_unique_majors.size(), + thrust::make_zip_iterator(this_type_unique_majors.begin(), + (*this_type_unique_major_hops).begin())); + input_pair_first = thrust::make_zip_iterator(this_label_unique_minors.begin(), + (*this_label_unique_minor_hops).begin()); + thrust::copy(handle.get_thrust_policy(), + input_pair_first, + input_pair_first + this_label_unique_minors.size(), + thrust::make_zip_iterator(this_type_unique_minors.begin(), + (*this_type_unique_minor_hops).begin())); + } + + if (this_type_unique_majors.size() + this_type_unique_minors.size() == 0) { continue; } + + rmm::device_uvector merged_vertices( + this_type_unique_majors.size() + this_type_unique_minors.size(), handle.get_stream()); + rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); + rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); + + auto major_triplet_first = + thrust::make_zip_iterator(this_type_unique_majors.begin(), + (*this_type_unique_major_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto minor_triplet_first = + thrust::make_zip_iterator(this_type_unique_minors.begin(), + (*this_type_unique_minor_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + major_triplet_first, + major_triplet_first + this_type_unique_majors.size(), + minor_triplet_first, + minor_triplet_first + this_type_unique_minors.size(), + thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); + merged_vertices.resize( + thrust::distance( + merged_vertices.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_hops.resize(merged_vertices.size(), handle.get_stream()); + merged_flags.resize(merged_vertices.size(), handle.get_stream()); + + if ((renumber_map_type_end_offset - renumber_map_type_start_offset) != + merged_vertices.size()) { // renumber map size == # unique vertices + return false; + } + + auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + + auto num_unique_keys = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(merged_hops.size()), + cugraph::detail::is_first_in_run_t{sort_key_first}); + rmm::device_uvector min_vertices(num_unique_keys, handle.get_stream()); + rmm::device_uvector max_vertices(num_unique_keys, handle.get_stream()); + + auto renumbered_merged_vertex_first = thrust::make_transform_iterator( + merged_vertices.begin(), + cuda::proclaim_return_type( + [this_type_sorted_org_vertices = raft::device_span( + this_type_sorted_org_vertices.data(), this_type_sorted_org_vertices.size()), + this_type_matching_renumbered_vertices = raft::device_span( + this_type_matching_renumbered_vertices.data(), + this_type_matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound(thrust::seq, + this_type_sorted_org_vertices.begin(), + this_type_sorted_org_vertices.end(), + major); + return this_type_matching_renumbered_vertices[thrust::distance( + this_type_sorted_org_vertices.begin(), it)]; + })); + + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, + thrust::make_discard_iterator(), + min_vertices.begin(), + thrust::equal_to>{}, + thrust::minimum{}); + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, + thrust::make_discard_iterator(), + max_vertices.begin(), + thrust::equal_to>{}, + thrust::maximum{}); + + auto num_violations = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{1}), + thrust::make_counting_iterator(min_vertices.size()), + [min_vertices = raft::device_span(min_vertices.data(), + min_vertices.size()), + max_vertices = raft::device_span( + max_vertices.data(), max_vertices.size())] __device__(size_t i) { + return min_vertices[i] <= max_vertices[i - 1]; + }); + + if (num_violations != 0) { return false; } + } else { + if (vertex_type_offsets) { + this_type_unique_majors.resize( + thrust::distance( + this_type_unique_majors.begin(), + thrust::copy_if( + handle.get_thrust_policy(), + this_label_unique_majors.begin(), + this_label_unique_majors.end(), + this_type_unique_majors.begin(), + [vertex_type_offsets = *vertex_type_offsets, vertex_type = j] __device__(auto v) { + auto type_idx = thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, vertex_type_offsets.begin() + 1, vertex_type_offsets.end(), v)); + return static_cast( + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + v))) == vertex_type; + })), + handle.get_stream()); + + this_type_unique_minors.resize( + thrust::distance( + this_type_unique_minors.begin(), + thrust::copy_if( + handle.get_thrust_policy(), + this_label_unique_minors.begin(), + this_label_unique_minors.end(), + this_type_unique_minors.begin(), + [vertex_type_offsets = *vertex_type_offsets, vertex_type = j] __device__(auto v) { + return static_cast( + thrust::distance(vertex_type_offsets.begin() + 1, + thrust::upper_bound(thrust::seq, + vertex_type_offsets.begin() + 1, + vertex_type_offsets.end(), + v))) == vertex_type; + })), + handle.get_stream()); + (*this_type_unique_minor_hops) + .resize(this_type_unique_minors.size(), handle.get_stream()); + } else { + thrust::copy(handle.get_thrust_policy(), + this_label_unique_majors.begin(), + this_label_unique_majors.end(), + this_type_unique_majors.begin()); + thrust::copy(handle.get_thrust_policy(), + this_label_unique_minors.begin(), + this_label_unique_minors.end(), + this_type_unique_minors.begin()); + } + + this_type_unique_minors.resize( + thrust::distance( + this_type_unique_minors.begin(), + thrust::remove_if(handle.get_thrust_policy(), + this_type_unique_minors.begin(), + this_type_unique_minors.end(), + [sorted_unique_majors = raft::device_span( + this_type_unique_majors.data(), + this_type_unique_majors.size())] __device__(auto minor) { + return thrust::binary_search(thrust::seq, + sorted_unique_majors.begin(), + sorted_unique_majors.end(), + minor); + })), + handle.get_stream()); + + if ((renumber_map_type_end_offset - renumber_map_type_start_offset) != + (this_type_unique_majors.size() + + this_type_unique_minors.size())) { // renumber map size == # unique vertices + return false; + } + + auto max_major_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + this_type_unique_majors.begin(), + this_type_unique_majors.end(), + cuda::proclaim_return_type( + [this_type_sorted_org_vertices = raft::device_span( + this_type_sorted_org_vertices.data(), this_type_sorted_org_vertices.size()), + this_type_matching_renumbered_vertices = raft::device_span( + this_type_matching_renumbered_vertices.data(), + this_type_matching_renumbered_vertices.size())] __device__(vertex_t major) + -> vertex_t { + auto it = thrust::lower_bound(thrust::seq, + this_type_sorted_org_vertices.begin(), + this_type_sorted_org_vertices.end(), + major); + return this_type_matching_renumbered_vertices[thrust::distance( + this_type_sorted_org_vertices.begin(), it)]; + }), + std::numeric_limits::lowest(), + thrust::maximum{}); + + auto min_minor_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + this_type_unique_minors.begin(), + this_type_unique_minors.end(), + cuda::proclaim_return_type( + [this_type_sorted_org_vertices = raft::device_span( + this_type_sorted_org_vertices.data(), this_type_sorted_org_vertices.size()), + this_type_matching_renumbered_vertices = raft::device_span( + this_type_matching_renumbered_vertices.data(), + this_type_matching_renumbered_vertices.size())] __device__(vertex_t minor) + -> vertex_t { + auto it = thrust::lower_bound(thrust::seq, + this_type_sorted_org_vertices.begin(), + this_type_sorted_org_vertices.end(), + minor); + return this_type_matching_renumbered_vertices[thrust::distance( + this_type_sorted_org_vertices.begin(), it)]; + }), + std::numeric_limits::max(), + thrust::minimum{}); + + if (max_major_renumbered_vertex >= min_minor_renumbered_vertex) { return false; } + } + } + } + + return true; +} + +template bool check_vertex_renumber_map_invariants( + raft::handle_t const& handle, + std::optional> starting_vertices, + std::optional> starting_vertex_label_offsets, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + std::optional> vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + bool src_is_major); + +template bool check_vertex_renumber_map_invariants( + raft::handle_t const& handle, + std::optional> starting_vertices, + std::optional> starting_vertex_label_offsets, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + std::optional> vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + bool src_is_major); + +template +bool check_edge_id_renumber_map_invariants( + raft::handle_t const& handle, + raft::device_span org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + size_t num_labels, + size_t num_edge_types) +{ + // Check the invariants in renumber_map + // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique vertices, + // where flag is 0 for majors and 1 for minors. Then, vertices with smaller (hop, flag) + // pairs should be renumbered to smaller numbers than vertices with larger (hop, flag) pairs. + + for (size_t i = 0; i < num_labels; ++i) { + size_t label_start_offset{0}; + auto label_end_offset = org_edgelist_edge_ids.size(); + if (org_edgelist_label_offsets) { + raft::update_host(&label_start_offset, + (*org_edgelist_label_offsets).data() + i, + size_t{1}, + handle.get_stream()); + raft::update_host(&label_end_offset, + (*org_edgelist_label_offsets).data() + i + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + if (label_start_offset == label_end_offset) { continue; } + + // compute unique key (edge type, edge ID), value (min. hop) pairs + + std::optional> this_label_unique_key_edge_types = + org_edgelist_edge_types ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (org_edgelist_edge_types) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_edge_types).begin() + label_start_offset, + (*org_edgelist_edge_types).begin() + label_end_offset, + (*this_label_unique_key_edge_types).begin()); + } + + rmm::device_uvector this_label_unique_key_edge_ids( + label_end_offset - label_start_offset, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_edge_ids.begin() + label_start_offset, + org_edgelist_edge_ids.begin() + label_end_offset, + this_label_unique_key_edge_ids.begin()); + + std::optional> this_label_unique_key_hops = + org_edgelist_hops ? std::make_optional>( + label_end_offset - label_start_offset, handle.get_stream()) + : std::nullopt; + if (org_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_hops).begin() + label_start_offset, + (*org_edgelist_hops).begin() + label_end_offset, + (*this_label_unique_key_hops).begin()); + } + + if (org_edgelist_edge_types) { + if (org_edgelist_hops) { + auto triplet_first = thrust::make_zip_iterator((*this_label_unique_key_edge_types).begin(), + this_label_unique_key_edge_ids.begin(), + (*this_label_unique_key_hops).begin()); + thrust::sort(handle.get_thrust_policy(), + triplet_first, + triplet_first + this_label_unique_key_edge_ids.size()); + auto key_first = thrust::make_zip_iterator((*this_label_unique_key_edge_types).begin(), + this_label_unique_key_edge_ids.begin()); + this_label_unique_key_edge_ids.resize( + thrust::distance( + key_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + key_first, + key_first + this_label_unique_key_edge_ids.size(), + (*this_label_unique_key_hops).begin()))), + handle.get_stream()); + (*this_label_unique_key_edge_types) + .resize(this_label_unique_key_edge_ids.size(), handle.get_stream()); + (*this_label_unique_key_hops) + .resize(this_label_unique_key_edge_ids.size(), handle.get_stream()); + } else { + auto pair_first = thrust::make_zip_iterator((*this_label_unique_key_edge_types).begin(), + this_label_unique_key_edge_ids.begin()); + thrust::sort(handle.get_thrust_policy(), + pair_first, + pair_first + this_label_unique_key_edge_ids.size()); + this_label_unique_key_edge_ids.resize( + thrust::distance(pair_first, + thrust::unique(handle.get_thrust_policy(), + pair_first, + pair_first + this_label_unique_key_edge_ids.size())), + handle.get_stream()); + (*this_label_unique_key_edge_types) + .resize(this_label_unique_key_edge_ids.size(), handle.get_stream()); + } + } else { + if (org_edgelist_hops) { + auto pair_first = thrust::make_zip_iterator(this_label_unique_key_edge_ids.begin(), + (*this_label_unique_key_hops).begin()); + thrust::sort(handle.get_thrust_policy(), + pair_first, + pair_first + this_label_unique_key_edge_ids.size()); + this_label_unique_key_edge_ids.resize( + thrust::distance( + this_label_unique_key_edge_ids.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + this_label_unique_key_edge_ids.begin(), + this_label_unique_key_edge_ids.end(), + (*this_label_unique_key_hops).begin()))), + handle.get_stream()); + (*this_label_unique_key_hops) + .resize(this_label_unique_key_edge_ids.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), + this_label_unique_key_edge_ids.begin(), + this_label_unique_key_edge_ids.end()); + this_label_unique_key_edge_ids.resize( + thrust::distance(this_label_unique_key_edge_ids.begin(), + thrust::unique(handle.get_thrust_policy(), + this_label_unique_key_edge_ids.begin(), + this_label_unique_key_edge_ids.end())), + handle.get_stream()); + } + } + + for (size_t j = 0; j < num_edge_types; ++j) { + size_t renumber_map_type_start_offset{0}; + auto renumber_map_type_end_offset = renumber_map.size(); + if (renumber_map_label_type_offsets) { + raft::update_host(&renumber_map_type_start_offset, + (*renumber_map_label_type_offsets).data() + i * num_edge_types + j, + size_t{1}, + handle.get_stream()); + raft::update_host(&renumber_map_type_end_offset, + (*renumber_map_label_type_offsets).data() + i * num_edge_types + j + 1, + size_t{1}, + handle.get_stream()); + handle.sync_stream(); + } + + rmm::device_uvector this_type_sorted_org_edge_ids( + renumber_map_type_end_offset - renumber_map_type_start_offset, handle.get_stream()); + rmm::device_uvector this_type_matching_renumbered_edge_ids( + this_type_sorted_org_edge_ids.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumber_map.begin() + renumber_map_type_start_offset, + renumber_map.begin() + renumber_map_type_end_offset, + this_type_sorted_org_edge_ids.begin()); + thrust::sequence(handle.get_thrust_policy(), + this_type_matching_renumbered_edge_ids.begin(), + this_type_matching_renumbered_edge_ids.end(), + edge_id_t{0}); + thrust::sort_by_key(handle.get_thrust_policy(), + this_type_sorted_org_edge_ids.begin(), + this_type_sorted_org_edge_ids.end(), + this_type_matching_renumbered_edge_ids.begin()); + + size_t type_start_offset{0}; + auto type_end_offset = this_label_unique_key_edge_ids.size(); + if (this_label_unique_key_edge_types) { + type_start_offset = static_cast( + thrust::distance((*this_label_unique_key_edge_types).begin(), + thrust::lower_bound(handle.get_thrust_policy(), + (*this_label_unique_key_edge_types).begin(), + (*this_label_unique_key_edge_types).end(), + static_cast(j)))); + type_end_offset = static_cast( + thrust::distance((*this_label_unique_key_edge_types).begin(), + thrust::upper_bound(handle.get_thrust_policy(), + (*this_label_unique_key_edge_types).begin(), + (*this_label_unique_key_edge_types).end(), + static_cast(j)))); + } + + if ((renumber_map_type_end_offset - renumber_map_type_start_offset) != + (type_end_offset - type_start_offset)) { // renumber map size == # unique edge IDs + return false; + } + + if (org_edgelist_hops) { + if (type_start_offset == type_end_offset) { continue; } + + auto sort_key_first = (*this_label_unique_key_hops).begin(); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first + type_start_offset, + sort_key_first + type_end_offset, + this_label_unique_key_edge_ids.begin() + type_start_offset); + + auto num_unique_keys = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(type_end_offset - type_start_offset), + cugraph::detail::is_first_in_run_t{ + sort_key_first + type_start_offset}); + rmm::device_uvector min_edge_ids(num_unique_keys, handle.get_stream()); + rmm::device_uvector max_edge_ids(num_unique_keys, handle.get_stream()); + + auto renumbered_edge_id_first = thrust::make_transform_iterator( + this_label_unique_key_edge_ids.begin(), + cuda::proclaim_return_type( + [this_type_sorted_org_edge_ids = raft::device_span( + this_type_sorted_org_edge_ids.data(), this_type_sorted_org_edge_ids.size()), + this_type_matching_renumbered_edge_ids = raft::device_span( + this_type_matching_renumbered_edge_ids.data(), + this_type_matching_renumbered_edge_ids.size())] __device__(edge_id_t id) { + auto it = thrust::lower_bound(thrust::seq, + this_type_sorted_org_edge_ids.begin(), + this_type_sorted_org_edge_ids.end(), + id); + return this_type_matching_renumbered_edge_ids[thrust::distance( + this_type_sorted_org_edge_ids.begin(), it)]; + })); + + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first + type_start_offset, + sort_key_first + type_end_offset, + renumbered_edge_id_first + type_start_offset, + thrust::make_discard_iterator(), + min_edge_ids.begin(), + thrust::equal_to{}, + thrust::minimum{}); + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first + type_start_offset, + sort_key_first + type_end_offset, + renumbered_edge_id_first + type_start_offset, + thrust::make_discard_iterator(), + max_edge_ids.begin(), + thrust::equal_to{}, + thrust::maximum{}); + + auto num_violations = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{1}), + thrust::make_counting_iterator(min_edge_ids.size()), + [min_edge_ids = raft::device_span(min_edge_ids.data(), + min_edge_ids.size()), + max_edge_ids = raft::device_span( + max_edge_ids.data(), max_edge_ids.size())] __device__(size_t i) { + return min_edge_ids[i] <= max_edge_ids[i - 1]; + }); + + if (num_violations != 0) { return false; } + } + } + } + + return true; +} + +template bool check_edge_id_renumber_map_invariants( + raft::handle_t const& handle, + raft::device_span org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + size_t num_labels, + size_t num_edge_types); + +template bool check_edge_id_renumber_map_invariants( + raft::handle_t const& handle, + raft::device_span org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + size_t num_labels, + size_t num_edge_types); diff --git a/cpp/tests/sampling/detail/sampling_post_processing_validate.hpp b/cpp/tests/sampling/detail/sampling_post_processing_validate.hpp new file mode 100644 index 00000000000..986265b368f --- /dev/null +++ b/cpp/tests/sampling/detail/sampling_post_processing_validate.hpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +template +bool check_offsets(raft::handle_t const& handle, + raft::device_span offsets, + index_t num_segments, + index_t num_elements); + +template +bool check_edgelist_is_sorted(raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors); + +// unrenumber the renumbered edge list and check whether the original & unrenumbered edge lists are +// identical +template +bool compare_edgelist(raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map, + std::optional> renumber_map_label_offsets, + size_t num_labels); + +// unrenumber the renumbered edge list and check whether the original & unrenumbered edge lists +// are identical +template +bool compare_heterogeneous_edgelist( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + std::optional> org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumbered_edgelist_edge_ids, + std::optional> renumbered_edgelist_label_edge_type_hop_offsets, + raft::device_span vertex_renumber_map, + raft::device_span vertex_renumber_map_label_type_offsets, + std::optional> edge_id_renumber_map, + std::optional> edge_id_renumber_map_label_type_offsets, + raft::device_span vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + size_t num_edge_types, + size_t num_hops); + +template +bool check_vertex_renumber_map_invariants( + raft::handle_t const& handle, + std::optional> starting_vertices, + std::optional> starting_vertex_label_offsets, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + std::optional> vertex_type_offsets, + size_t num_labels, + size_t num_vertex_types, + bool src_is_major); + +template +bool check_edge_id_renumber_map_invariants( + raft::handle_t const& handle, + raft::device_span org_edgelist_edge_ids, + std::optional> org_edgelist_edge_types, + std::optional> org_edgelist_hops, + std::optional> org_edgelist_label_offsets, + raft::device_span renumber_map, + std::optional> renumber_map_label_type_offsets, + size_t num_labels, + size_t num_edge_types); diff --git a/cpp/tests/sampling/mg_negative_sampling.cpp b/cpp/tests/sampling/mg_negative_sampling.cpp new file mode 100644 index 00000000000..7c64bb7fbbb --- /dev/null +++ b/cpp/tests/sampling/mg_negative_sampling.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/validation_utilities.hpp" + +#include +#include + +#include + +struct Negative_Sampling_Usecase { + float sample_multiplier{2}; + bool use_src_bias{false}; + bool use_dst_bias{false}; + bool remove_duplicates{false}; + bool remove_existing_edges{false}; + bool exact_number_of_samples{false}; + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGNegative_Sampling : public ::testing::TestWithParam { + public: + using graph_t = cugraph::graph_t; + using graph_view_t = cugraph::graph_view_t; + + Tests_MGNegative_Sampling() : graph_(*handle_) {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + template + void load_graph(input_t const& param) + { + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + std::tie(graph_, edge_weights_, renumber_map_labels_) = + cugraph::test::construct_graph( + *handle_, param, true, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + edge_mask_ = + cugraph::test::generate::edge_property(*handle_, graph_.view(), 2); + } + + virtual void SetUp() {} + virtual void TearDown() {} + + void run_current_test(raft::random::RngState& rng_state, + Negative_Sampling_Usecase const& negative_sampling_usecase) + { + constexpr bool do_expensive_check{false}; + + HighResTimer hr_timer{}; + + auto graph_view = graph_.view(); + + if (negative_sampling_usecase.edge_masking) { graph_view.attach_edge_mask(edge_mask_->view()); } + + size_t num_samples = + graph_view.compute_number_of_edges(*handle_) * negative_sampling_usecase.sample_multiplier; + + rmm::device_uvector src_bias_v(0, handle_->get_stream()); + rmm::device_uvector dst_bias_v(0, handle_->get_stream()); + + std::optional> src_bias{std::nullopt}; + std::optional> dst_bias{std::nullopt}; + + if (negative_sampling_usecase.use_src_bias) { + src_bias_v.resize(graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + + cugraph::detail::uniform_random_fill(handle_->get_stream(), + src_bias_v.data(), + src_bias_v.size(), + weight_t{1}, + weight_t{10}, + rng_state); + + src_bias = raft::device_span{src_bias_v.data(), src_bias_v.size()}; + } + + if (negative_sampling_usecase.use_dst_bias) { + dst_bias_v.resize(graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + + cugraph::detail::uniform_random_fill(handle_->get_stream(), + dst_bias_v.data(), + dst_bias_v.size(), + weight_t{1}, + weight_t{10}, + rng_state); + + dst_bias = raft::device_span{dst_bias_v.data(), dst_bias_v.size()}; + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Negative sampling"); + } + + auto&& [src_out, dst_out] = + cugraph::negative_sampling(*handle_, + rng_state, + graph_view, + src_bias, + dst_bias, + num_samples, + negative_sampling_usecase.remove_duplicates, + negative_sampling_usecase.remove_existing_edges, + negative_sampling_usecase.exact_number_of_samples, + do_expensive_check); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (negative_sampling_usecase.check_correctness) { + ASSERT_EQ(src_out.size(), dst_out.size()) << "Result size (src, dst) mismatch"; + + cugraph::test::sort(*handle_, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}); + + // TODO: Move this to validation_utilities... + auto h_vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + rmm::device_uvector d_vertex_partition_range_lasts( + h_vertex_partition_range_lasts.size(), handle_->get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + h_vertex_partition_range_lasts.data(), + h_vertex_partition_range_lasts.size(), + handle_->get_stream()); + + size_t error_count = cugraph::test::count_edges_on_wrong_int_gpu( + *handle_, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}, + raft::device_span{d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()}); + + ASSERT_EQ(error_count, 0) << "generate edges out of range > 0"; + + if ((negative_sampling_usecase.remove_duplicates) && (src_out.size() > 0)) { + error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( + *handle_, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}); + ASSERT_EQ(error_count, 0) << "Remove duplicates specified, found duplicate entries"; + } + + if (negative_sampling_usecase.remove_existing_edges) { + rmm::device_uvector graph_src(0, handle_->get_stream()); + rmm::device_uvector graph_dst(0, handle_->get_stream()); + + std::tie(graph_src, graph_dst, std::ignore, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, graph_view, std::nullopt, std::nullopt, std::nullopt, std::nullopt); + + error_count = cugraph::test::count_intersection( + *handle_, + raft::device_span{graph_src.data(), graph_src.size()}, + raft::device_span{graph_dst.data(), graph_dst.size()}, + std::nullopt, + std::nullopt, + std::nullopt, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}, + std::nullopt, + std::nullopt, + std::nullopt); + ASSERT_EQ(error_count, 0) << "Remove existing edges specified, found existing edges"; + } + + if (negative_sampling_usecase.exact_number_of_samples) { + size_t sz = cugraph::host_scalar_allreduce( + handle_->get_comms(), src_out.size(), raft::comms::op_t::SUM, handle_->get_stream()); + ASSERT_EQ(sz, num_samples) << "Expected exact number of samples"; + } + + // TBD: How do we determine if we have properly reflected the biases? + } + } + + public: + static std::unique_ptr handle_; + + private: + graph_t graph_; + std::optional> edge_weights_{std::nullopt}; + std::optional> edge_mask_{std::nullopt}; + std::optional> renumber_map_labels_{std::nullopt}; +}; + +template +std::unique_ptr + Tests_MGNegative_Sampling::handle_ = nullptr; + +using Tests_MGNegative_Sampling_File_i64_i64_float = + Tests_MGNegative_Sampling; + +using Tests_MGNegative_Sampling_Rmat_i64_i64_float = + Tests_MGNegative_Sampling; + +template +void run_all_tests(CurrentTest* current_test) +{ + raft::random::RngState rng_state{ + static_cast(current_test->handle_->get_comms().get_rank())}; + + for (bool use_src_bias : {false, true}) + for (bool use_dst_bias : {false, true}) + for (bool remove_duplicates : {false, true}) + for (bool remove_existing_edges : {false, true}) + for (bool exact_number_of_samples : {false, true}) + for (bool edge_masking : {false, true}) + current_test->run_current_test(rng_state, + Negative_Sampling_Usecase{2, + use_src_bias, + use_dst_bias, + remove_duplicates, + remove_existing_edges, + exact_number_of_samples, + edge_masking}); +} + +TEST_P(Tests_MGNegative_Sampling_File_i64_i64_float, CheckInt64Int64Float) +{ + load_graph(override_File_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_MGNegative_Sampling_Rmat_i64_i64_float, CheckInt64Int64Float) +{ + load_graph(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGNegative_Sampling_File_i64_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_MGNegative_Sampling_File_i64_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGNegative_Sampling_Rmat_i64_i64_float, + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGNegative_Sampling_Rmat_i64_i64_float, + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/mg_random_walks_test.cpp b/cpp/tests/sampling/mg_random_walks_test.cpp index c2ad5c37e9e..e2415c08e60 100644 --- a/cpp/tests/sampling/mg_random_walks_test.cpp +++ b/cpp/tests/sampling/mg_random_walks_test.cpp @@ -44,8 +44,10 @@ struct UniformRandomWalks_Usecase { raft::device_span start_vertices, size_t max_depth) { + raft::random::RngState rng_state(static_cast(handle.get_comms().get_rank())); + return cugraph::uniform_random_walks( - handle, graph_view, edge_weight_view, start_vertices, max_depth, seed); + handle, rng_state, graph_view, edge_weight_view, start_vertices, max_depth); } bool expect_throw() { return false; } @@ -66,12 +68,13 @@ struct BiasedRandomWalks_Usecase { { CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Biased random walk requires edge weights."); + raft::random::RngState rng_state(static_cast(handle.get_comms().get_rank())); + return cugraph::biased_random_walks( - handle, graph_view, *edge_weight_view, start_vertices, max_depth, seed); + handle, rng_state, graph_view, *edge_weight_view, start_vertices, max_depth); } - // FIXME: Not currently implemented - bool expect_throw() { return true; } + bool expect_throw() { return !test_weighted; } }; struct Node2VecRandomWalks_Usecase { @@ -89,18 +92,19 @@ struct Node2VecRandomWalks_Usecase { raft::device_span start_vertices, size_t max_depth) { + raft::random::RngState rng_state(static_cast(handle.get_comms().get_rank())); + return cugraph::node2vec_random_walks(handle, + rng_state, graph_view, edge_weight_view, start_vertices, max_depth, static_cast(p), - static_cast(q), - seed); + static_cast(q)); } - // FIXME: Not currently implemented - bool expect_throw() { return true; } + bool expect_throw() { return false; } }; template diff --git a/cpp/tests/sampling/negative_sampling.cpp b/cpp/tests/sampling/negative_sampling.cpp new file mode 100644 index 00000000000..ba929c63e9b --- /dev/null +++ b/cpp/tests/sampling/negative_sampling.cpp @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/validation_utilities.hpp" + +#include +#include + +#include + +struct Negative_Sampling_Usecase { + float sample_multiplier{2}; + bool use_src_bias{false}; + bool use_dst_bias{false}; + bool remove_duplicates{false}; + bool remove_existing_edges{false}; + bool exact_number_of_samples{false}; + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_Negative_Sampling : public ::testing::TestWithParam { + public: + using graph_t = cugraph::graph_t; + using graph_view_t = cugraph::graph_view_t; + + Tests_Negative_Sampling() : graph_(raft::handle_t{}) {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + template + void load_graph(input_t const& param) + { + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + std::tie(graph_, edge_weights_, renumber_map_labels_) = + cugraph::test::construct_graph( + handle, param, true, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + edge_mask_ = + cugraph::test::generate::edge_property(handle, graph_.view(), 2); + } + + virtual void SetUp() {} + virtual void TearDown() {} + + void run_current_test(raft::random::RngState& rng_state, + Negative_Sampling_Usecase const& negative_sampling_usecase) + { + constexpr bool do_expensive_check{false}; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + auto graph_view = graph_.view(); + + if (negative_sampling_usecase.edge_masking) { graph_view.attach_edge_mask(edge_mask_->view()); } + + size_t num_samples = + graph_view.compute_number_of_edges(handle) * negative_sampling_usecase.sample_multiplier; + + rmm::device_uvector src_bias_v(0, handle.get_stream()); + rmm::device_uvector dst_bias_v(0, handle.get_stream()); + + std::optional> src_bias{std::nullopt}; + std::optional> dst_bias{std::nullopt}; + + if (negative_sampling_usecase.use_src_bias) { + src_bias_v.resize(graph_view.number_of_vertices(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + src_bias_v.data(), + src_bias_v.size(), + weight_t{1}, + weight_t{10}, + rng_state); + + src_bias = raft::device_span{src_bias_v.data(), src_bias_v.size()}; + } + + if (negative_sampling_usecase.use_dst_bias) { + dst_bias_v.resize(graph_view.number_of_vertices(), handle.get_stream()); + + cugraph::detail::uniform_random_fill(handle.get_stream(), + dst_bias_v.data(), + dst_bias_v.size(), + weight_t{1}, + weight_t{10}, + rng_state); + + dst_bias = raft::device_span{dst_bias_v.data(), dst_bias_v.size()}; + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Negative sampling"); + } + + auto&& [src_out, dst_out] = + cugraph::negative_sampling(handle, + rng_state, + graph_view, + src_bias, + dst_bias, + num_samples, + negative_sampling_usecase.remove_duplicates, + negative_sampling_usecase.remove_existing_edges, + negative_sampling_usecase.exact_number_of_samples, + do_expensive_check); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (negative_sampling_usecase.check_correctness) { + ASSERT_EQ(src_out.size(), dst_out.size()) << "Result size (src, dst) mismatch"; + + cugraph::test::sort(handle, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}); + + size_t error_count = cugraph::test::count_invalid_vertices( + handle, + raft::device_span{src_out.data(), src_out.size()}, + graph_view.local_vertex_partition_view()); + ASSERT_EQ(error_count, 0) << "Source vertices out of range > 0"; + + error_count = cugraph::test::count_invalid_vertices( + handle, + raft::device_span{dst_out.data(), dst_out.size()}, + graph_view.local_vertex_partition_view()); + ASSERT_EQ(error_count, 0) << "Dest vertices out of range > 0"; + + if (negative_sampling_usecase.remove_duplicates) { + error_count = cugraph::test::count_duplicate_vertex_pairs_sorted( + handle, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}); + ASSERT_EQ(error_count, 0) << "Remove duplicates specified, found duplicate entries"; + } + + if (negative_sampling_usecase.remove_existing_edges) { + rmm::device_uvector graph_src(0, handle.get_stream()); + rmm::device_uvector graph_dst(0, handle.get_stream()); + + std::tie(graph_src, graph_dst, std::ignore, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + handle, graph_view, std::nullopt, std::nullopt, std::nullopt, std::nullopt); + + error_count = cugraph::test::count_intersection( + handle, + raft::device_span{graph_src.data(), graph_src.size()}, + raft::device_span{graph_dst.data(), graph_dst.size()}, + std::nullopt, + std::nullopt, + std::nullopt, + raft::device_span{src_out.data(), src_out.size()}, + raft::device_span{dst_out.data(), dst_out.size()}, + std::nullopt, + std::nullopt, + std::nullopt); + + ASSERT_EQ(error_count, 0) << "Remove existing edges specified, found existing edges"; + } + + if (negative_sampling_usecase.exact_number_of_samples) { + ASSERT_EQ(src_out.size(), num_samples) << "Expected exact number of samples"; + } + + // TBD: How do we determine if we have properly reflected the biases? + } + } + + private: + graph_t graph_; + std::optional> edge_weights_{std::nullopt}; + std::optional> edge_mask_{std::nullopt}; + std::optional> renumber_map_labels_{std::nullopt}; +}; + +using Tests_Negative_Sampling_File_i32_i32_float = + Tests_Negative_Sampling; + +using Tests_Negative_Sampling_File_i32_i64_float = + Tests_Negative_Sampling; + +using Tests_Negative_Sampling_File_i64_i64_float = + Tests_Negative_Sampling; + +using Tests_Negative_Sampling_Rmat_i32_i32_float = + Tests_Negative_Sampling; + +using Tests_Negative_Sampling_Rmat_i32_i64_float = + Tests_Negative_Sampling; + +using Tests_Negative_Sampling_Rmat_i64_i64_float = + Tests_Negative_Sampling; + +template +void run_all_tests(CurrentTest* current_test) +{ + raft::random::RngState rng_state{0}; + + for (bool use_src_bias : {false, true}) + for (bool use_dst_bias : {false, true}) + for (bool remove_duplicates : {false, true}) + for (bool remove_existing_edges : {false, true}) + for (bool exact_number_of_samples : {false, true}) + for (bool edge_masking : {false, true}) + current_test->run_current_test(rng_state, + Negative_Sampling_Usecase{2, + use_src_bias, + use_dst_bias, + remove_duplicates, + remove_existing_edges, + exact_number_of_samples, + edge_masking}); +} + +TEST_P(Tests_Negative_Sampling_File_i32_i32_float, CheckInt32Int32Float) +{ + load_graph(override_File_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_Negative_Sampling_File_i32_i64_float, CheckInt32Int64Float) +{ + load_graph(override_File_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_Negative_Sampling_File_i64_i64_float, CheckInt64Int64Float) +{ + load_graph(override_File_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_Negative_Sampling_Rmat_i32_i32_float, CheckInt32Int32Float) +{ + load_graph(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_Negative_Sampling_Rmat_i32_i64_float, CheckInt32Int64Float) +{ + load_graph(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +TEST_P(Tests_Negative_Sampling_Rmat_i64_i64_float, CheckInt64Int64Float) +{ + load_graph(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); + run_all_tests(this); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Negative_Sampling_File_i32_i32_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_Negative_Sampling_File_i32_i32_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Negative_Sampling_File_i32_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_Negative_Sampling_File_i32_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Negative_Sampling_File_i64_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_Negative_Sampling_File_i64_i64_float, + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Negative_Sampling_Rmat_i32_i32_float, + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Negative_Sampling_Rmat_i32_i64_float, + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Negative_Sampling_Rmat_i64_i64_float, + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Negative_Sampling_Rmat_i64_i64_float, + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/random_walks_check.cuh b/cpp/tests/sampling/random_walks_check.cuh index 0fd73b5bba7..380b97a5b84 100644 --- a/cpp/tests/sampling/random_walks_check.cuh +++ b/cpp/tests/sampling/random_walks_check.cuh @@ -108,7 +108,7 @@ void random_walks_validate( (int)d, (float)w); } else { - printf("edge (%d,%d) NOT FOUND\n", (int)s, (int)d); + printf("edge (%d,%d), weight %g NOT FOUND\n", (int)s, (int)d, (float)w); } return 1; diff --git a/cpp/tests/sampling/sampling_heterogeneous_post_processing_test.cpp b/cpp/tests/sampling/sampling_heterogeneous_post_processing_test.cpp new file mode 100644 index 00000000000..2b2049dc8db --- /dev/null +++ b/cpp/tests/sampling/sampling_heterogeneous_post_processing_test.cpp @@ -0,0 +1,828 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/sampling_post_processing_validate.hpp" +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" + +#include +#include +#include + +#include + +#include + +#include + +struct SamplingHeterogeneousPostProcessing_Usecase { + size_t num_labels{}; + size_t num_seeds_per_label{}; + size_t num_vertex_types{}; + std::vector fanouts{{-1}}; + bool sample_with_replacement{false}; + + bool src_is_major{true}; + bool renumber_with_seeds{false}; + bool check_correctness{true}; +}; + +template +class Tests_SamplingHeterogeneousPostProcessing + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_SamplingHeterogeneousPostProcessing() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + using label_t = int32_t; + using weight_t = float; + using edge_id_t = edge_t; + using edge_type_t = int32_t; + + bool constexpr store_transposed = false; + bool constexpr renumber = true; + bool constexpr test_weighted = true; + + auto [sampling_heterogeneous_post_processing_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 1. create a graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, test_weighted, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + // 2. vertex type offsets + + raft::random::RngState rng_state(0); + + rmm::device_uvector vertex_type_offsets( + sampling_heterogeneous_post_processing_usecase.num_vertex_types + 1, handle.get_stream()); + { + auto num_vertices = graph_view.number_of_vertices(); + vertex_type_offsets.set_element_to_zero_async(0, handle.get_stream()); + vertex_type_offsets.set_element_async( + vertex_type_offsets.size() - 1, num_vertices, handle.get_stream()); + auto tmp = cugraph::select_random_vertices( + handle, + graph_view, + std::nullopt, + rng_state, + sampling_heterogeneous_post_processing_usecase.num_vertex_types - 1, + false /* with_replacement */, + true /* sort_vertices */); + raft::copy(vertex_type_offsets.data() + 1, tmp.data(), tmp.size(), handle.get_stream()); + } + + // 3. seed vertices (& labels) + + rmm::device_uvector starting_vertices( + sampling_heterogeneous_post_processing_usecase.num_labels * + sampling_heterogeneous_post_processing_usecase.num_seeds_per_label, + handle.get_stream()); + cugraph::detail::uniform_random_fill(handle.get_stream(), + starting_vertices.data(), + starting_vertices.size(), + vertex_t{0}, + graph_view.number_of_vertices(), + rng_state); + auto starting_vertex_labels = (sampling_heterogeneous_post_processing_usecase.num_labels > 1) + ? std::make_optional>( + starting_vertices.size(), handle.get_stream()) + : std::nullopt; + auto starting_vertex_label_offsets = + (sampling_heterogeneous_post_processing_usecase.num_labels > 1) + ? std::make_optional>( + sampling_heterogeneous_post_processing_usecase.num_labels + 1, handle.get_stream()) + : std::nullopt; + if (starting_vertex_labels) { + auto num_seeds_per_label = sampling_heterogeneous_post_processing_usecase.num_seeds_per_label; + for (size_t i = 0; i < sampling_heterogeneous_post_processing_usecase.num_labels; ++i) { + cugraph::detail::scalar_fill(handle.get_stream(), + (*starting_vertex_labels).data() + i * num_seeds_per_label, + num_seeds_per_label, + static_cast(i)); + } + cugraph::detail::stride_fill(handle.get_stream(), + (*starting_vertex_label_offsets).data(), + (*starting_vertex_label_offsets).size(), + size_t{0}, + num_seeds_per_label); + } + + // 4. generate edge IDs and types + + auto num_edge_types = + sampling_heterogeneous_post_processing_usecase.num_vertex_types * + sampling_heterogeneous_post_processing_usecase + .num_vertex_types; // necessary to enforce that edge type dictates edge source vertex type + // and edge destination vertex type. + + std::optional> edge_types{ + std::nullopt}; + if (num_edge_types > 1) { + edge_types = + cugraph::test::generate::edge_property_by_src_dst_types( + handle, + graph_view, + raft::device_span(vertex_type_offsets.data(), vertex_type_offsets.size()), + num_edge_types); + } + + cugraph::edge_property_t edge_ids(handle); + if (edge_types) { + static_assert(std::is_same_v); + edge_ids = + cugraph::test::generate::unique_edge_property_per_type( + handle, graph_view, (*edge_types).view(), static_cast(num_edge_types)); + } else { + edge_ids = cugraph::test::generate::unique_edge_property( + handle, graph_view); + } + + // 5. sampling + + rmm::device_uvector org_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector org_edgelist_dsts(0, handle.get_stream()); + std::optional> org_edgelist_weights{std::nullopt}; + std::optional> org_edgelist_edge_ids{std::nullopt}; + std::optional> org_edgelist_edge_types{std::nullopt}; + std::optional> org_edgelist_hops{std::nullopt}; + std::optional> org_labels{std::nullopt}; + std::optional> org_edgelist_label_offsets{std::nullopt}; + std::tie(org_edgelist_srcs, + org_edgelist_dsts, + org_edgelist_weights, + org_edgelist_edge_ids, + org_edgelist_edge_types, + org_edgelist_hops, + org_labels, + org_edgelist_label_offsets) = cugraph::uniform_neighbor_sample( + handle, + graph_view, + edge_weight_view, + std::optional>{edge_ids.view()}, + edge_types + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, + raft::device_span(starting_vertices.data(), starting_vertices.size()), + starting_vertex_labels ? std::make_optional>( + (*starting_vertex_labels).data(), (*starting_vertex_labels).size()) + : std::nullopt, + std::nullopt, + raft::host_span(sampling_heterogeneous_post_processing_usecase.fanouts.data(), + sampling_heterogeneous_post_processing_usecase.fanouts.size()), + rng_state, + sampling_heterogeneous_post_processing_usecase.fanouts.size() > 1, + sampling_heterogeneous_post_processing_usecase.sample_with_replacement, + cugraph::prior_sources_behavior_t::EXCLUDE, + false); + + if (!sampling_heterogeneous_post_processing_usecase.src_is_major) { + std::swap(org_edgelist_srcs, org_edgelist_dsts); + } + + // 6. post processing: renumber & sort + + { + rmm::device_uvector renumbered_and_sorted_edgelist_srcs(org_edgelist_srcs.size(), + handle.get_stream()); + rmm::device_uvector renumbered_and_sorted_edgelist_dsts(org_edgelist_dsts.size(), + handle.get_stream()); + auto renumbered_and_sorted_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + auto renumbered_and_sorted_edgelist_edge_ids = + org_edgelist_edge_ids ? std::make_optional>( + (*org_edgelist_edge_ids).size(), handle.get_stream()) + : std::nullopt; + auto renumbered_and_sorted_edgelist_edge_types = + org_edgelist_edge_types ? std::make_optional>( + (*org_edgelist_edge_types).size(), handle.get_stream()) + : std::nullopt; + auto renumbered_and_sorted_edgelist_hops = + org_edgelist_hops ? std::make_optional(rmm::device_uvector( + (*org_edgelist_hops).size(), handle.get_stream())) + : std::nullopt; + + raft::copy(renumbered_and_sorted_edgelist_srcs.data(), + org_edgelist_srcs.data(), + org_edgelist_srcs.size(), + handle.get_stream()); + raft::copy(renumbered_and_sorted_edgelist_dsts.data(), + org_edgelist_dsts.data(), + org_edgelist_dsts.size(), + handle.get_stream()); + if (renumbered_and_sorted_edgelist_weights) { + raft::copy((*renumbered_and_sorted_edgelist_weights).data(), + (*org_edgelist_weights).data(), + (*org_edgelist_weights).size(), + handle.get_stream()); + } + if (renumbered_and_sorted_edgelist_edge_ids) { + raft::copy((*renumbered_and_sorted_edgelist_edge_ids).data(), + (*org_edgelist_edge_ids).data(), + (*org_edgelist_edge_ids).size(), + handle.get_stream()); + } + if (renumbered_and_sorted_edgelist_edge_types) { + raft::copy((*renumbered_and_sorted_edgelist_edge_types).data(), + (*org_edgelist_edge_types).data(), + (*org_edgelist_edge_types).size(), + handle.get_stream()); + } + if (renumbered_and_sorted_edgelist_hops) { + raft::copy((*renumbered_and_sorted_edgelist_hops).data(), + (*org_edgelist_hops).data(), + (*org_edgelist_hops).size(), + handle.get_stream()); + } + + std::optional> + renumbered_and_sorted_edgelist_label_type_hop_offsets{std::nullopt}; + rmm::device_uvector renumbered_and_sorted_vertex_renumber_map(0, + handle.get_stream()); + rmm::device_uvector renumbered_and_sorted_vertex_renumber_map_label_type_offsets( + 0, handle.get_stream()); + std::optional> renumbered_and_sorted_edge_id_renumber_map{ + std::nullopt}; + std::optional> + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets{std::nullopt}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Renumber and sort sampled edgelist"); + } + + std::tie(renumbered_and_sorted_edgelist_srcs, + renumbered_and_sorted_edgelist_dsts, + renumbered_and_sorted_edgelist_weights, + renumbered_and_sorted_edgelist_edge_ids, + renumbered_and_sorted_edgelist_label_type_hop_offsets, + renumbered_and_sorted_vertex_renumber_map, + renumbered_and_sorted_vertex_renumber_map_label_type_offsets, + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = + cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( + handle, + std::move(renumbered_and_sorted_edgelist_srcs), + std::move(renumbered_and_sorted_edgelist_dsts), + std::move(renumbered_and_sorted_edgelist_weights), + std::move(renumbered_and_sorted_edgelist_edge_ids), + std::move(renumbered_and_sorted_edgelist_edge_types), + std::move(renumbered_and_sorted_edgelist_hops), + sampling_heterogeneous_post_processing_usecase.renumber_with_seeds + ? std::make_optional>(starting_vertices.data(), + starting_vertices.size()) + : std::nullopt, + (sampling_heterogeneous_post_processing_usecase.renumber_with_seeds && + starting_vertex_label_offsets) + ? std::make_optional>( + (*starting_vertex_label_offsets).data(), (*starting_vertex_label_offsets).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional(raft::device_span( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size())) + : std::nullopt, + raft::device_span(vertex_type_offsets.data(), vertex_type_offsets.size()), + sampling_heterogeneous_post_processing_usecase.num_labels, + sampling_heterogeneous_post_processing_usecase.fanouts.size(), + sampling_heterogeneous_post_processing_usecase.num_vertex_types, + num_edge_types, + sampling_heterogeneous_post_processing_usecase.src_is_major); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (sampling_heterogeneous_post_processing_usecase.check_correctness) { + if (renumbered_and_sorted_edgelist_label_type_hop_offsets) { + ASSERT_TRUE(check_offsets( + handle, + raft::device_span( + (*renumbered_and_sorted_edgelist_label_type_hop_offsets).data(), + (*renumbered_and_sorted_edgelist_label_type_hop_offsets).size()), + sampling_heterogeneous_post_processing_usecase.num_labels * num_edge_types * + sampling_heterogeneous_post_processing_usecase.fanouts.size(), + renumbered_and_sorted_edgelist_srcs.size())) + << "Renumbered and sorted edge (label, edge type, hop) offset array is invalid."; + } + + ASSERT_TRUE( + check_offsets(handle, + raft::device_span( + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.data(), + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.size()), + sampling_heterogeneous_post_processing_usecase.num_labels * + sampling_heterogeneous_post_processing_usecase.num_vertex_types, + renumbered_and_sorted_vertex_renumber_map.size())) + << "Renumbered and sorted vertex renumber map (label, vertex type) offset array is " + "invalid."; + + if (renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) { + ASSERT_TRUE(check_offsets( + handle, + raft::device_span( + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).data(), + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).size()), + sampling_heterogeneous_post_processing_usecase.num_labels * num_edge_types, + (*renumbered_and_sorted_edge_id_renumber_map).size())) + << "Renumbered and sorted edge renumber map (label, edge type) offset array is " + "invalid."; + } + + // check whether the edges are properly sorted + + auto renumbered_and_sorted_edgelist_majors = + sampling_heterogeneous_post_processing_usecase.src_is_major + ? raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()) + : raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()); + auto renumbered_and_sorted_edgelist_minors = + sampling_heterogeneous_post_processing_usecase.src_is_major + ? raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()) + : raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()); + + if (renumbered_and_sorted_edgelist_label_type_hop_offsets) { + for (size_t i = 0; + i < sampling_heterogeneous_post_processing_usecase.num_labels * num_edge_types * + sampling_heterogeneous_post_processing_usecase.fanouts.size(); + ++i) { + auto hop_start_offset = (*renumbered_and_sorted_edgelist_label_type_hop_offsets) + .element(i, handle.get_stream()); + auto hop_end_offset = (*renumbered_and_sorted_edgelist_label_type_hop_offsets) + .element(i + 1, handle.get_stream()); + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span( + renumbered_and_sorted_edgelist_majors.data() + hop_start_offset, + hop_end_offset - hop_start_offset), + raft::device_span( + renumbered_and_sorted_edgelist_minors.data() + hop_start_offset, + hop_end_offset - hop_start_offset))) + << "Renumbered and sorted edge list is not properly sorted."; + } + } else { + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span(renumbered_and_sorted_edgelist_majors.data(), + renumbered_and_sorted_edgelist_majors.size()), + raft::device_span(renumbered_and_sorted_edgelist_minors.data(), + renumbered_and_sorted_edgelist_minors.size()))) + << "Renumbered and sorted edge list is not properly sorted."; + } + + // check whether renumbering recovers the original edge list + + ASSERT_TRUE(compare_heterogeneous_edgelist( + handle, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data(), (*org_edgelist_weights).size()) + : std::nullopt, + org_edgelist_edge_ids + ? std::make_optional>( + (*org_edgelist_edge_ids).data(), (*org_edgelist_edge_ids).size()) + : std::nullopt, + org_edgelist_edge_types + ? std::make_optional>( + (*org_edgelist_edge_types).data(), (*org_edgelist_edge_types).size()) + : std::nullopt, + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data(), (*org_edgelist_hops).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional>( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size()) + : std::nullopt, + raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()), + raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()), + renumbered_and_sorted_edgelist_weights + ? std::make_optional>( + (*renumbered_and_sorted_edgelist_weights).data(), + (*renumbered_and_sorted_edgelist_weights).size()) + : std::nullopt, + renumbered_and_sorted_edgelist_edge_ids + ? std::make_optional>( + (*renumbered_and_sorted_edgelist_edge_ids).data(), + (*renumbered_and_sorted_edgelist_edge_ids).size()) + : std::nullopt, + renumbered_and_sorted_edgelist_label_type_hop_offsets + ? std::make_optional>( + (*renumbered_and_sorted_edgelist_label_type_hop_offsets).data(), + (*renumbered_and_sorted_edgelist_label_type_hop_offsets).size()) + : std::nullopt, + raft::device_span(renumbered_and_sorted_vertex_renumber_map.data(), + renumbered_and_sorted_vertex_renumber_map.size()), + raft::device_span( + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.data(), + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.size()), + renumbered_and_sorted_edge_id_renumber_map + ? std::make_optional>( + (*renumbered_and_sorted_edge_id_renumber_map).data(), + (*renumbered_and_sorted_edge_id_renumber_map).size()) + : std::nullopt, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets + ? std::make_optional>( + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).data(), + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).size()) + : std::nullopt, + raft::device_span(vertex_type_offsets.data(), vertex_type_offsets.size()), + sampling_heterogeneous_post_processing_usecase.num_labels, + sampling_heterogeneous_post_processing_usecase.num_vertex_types, + num_edge_types, + sampling_heterogeneous_post_processing_usecase.fanouts.size())) + << "Unrenumbering the renumbered and sorted edge list does not recover the original " + "edgelist."; + + // Check the invariants in vertex renumber_map + + ASSERT_TRUE(check_vertex_renumber_map_invariants( + handle, + sampling_heterogeneous_post_processing_usecase.renumber_with_seeds + ? std::make_optional>(starting_vertices.data(), + starting_vertices.size()) + : std::nullopt, + (sampling_heterogeneous_post_processing_usecase.renumber_with_seeds && + starting_vertex_label_offsets) + ? std::make_optional>( + (*starting_vertex_label_offsets).data(), (*starting_vertex_label_offsets).size()) + : std::nullopt, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data(), (*org_edgelist_hops).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional>( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size()) + : std::nullopt, + raft::device_span(renumbered_and_sorted_vertex_renumber_map.data(), + renumbered_and_sorted_vertex_renumber_map.size()), + std::make_optional>( + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.data(), + renumbered_and_sorted_vertex_renumber_map_label_type_offsets.size()), + raft::device_span(vertex_type_offsets.data(), vertex_type_offsets.size()), + sampling_heterogeneous_post_processing_usecase.num_labels, + sampling_heterogeneous_post_processing_usecase.num_vertex_types, + sampling_heterogeneous_post_processing_usecase.src_is_major)) + << "Renumbered and sorted output vertex renumber map violates invariants."; + + // Check the invariants in edge renumber_map + + if (org_edgelist_edge_ids) { + ASSERT_TRUE(check_edge_id_renumber_map_invariants( + handle, + raft::device_span((*org_edgelist_edge_ids).data(), + (*org_edgelist_edge_ids).size()), + org_edgelist_edge_types + ? std::make_optional>( + (*org_edgelist_edge_types).data(), (*org_edgelist_edge_types).size()) + : std::nullopt, + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data(), (*org_edgelist_hops).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional>( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size()) + : std::nullopt, + raft::device_span( + (*renumbered_and_sorted_edge_id_renumber_map).data(), + (*renumbered_and_sorted_edge_id_renumber_map).size()), + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets + ? std::make_optional>( + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).data(), + (*renumbered_and_sorted_edge_id_renumber_map_label_type_offsets).size()) + : std::nullopt, + sampling_heterogeneous_post_processing_usecase.num_labels, + num_edge_types)) + << "Renumbered and sorted output edge ID renumber map violates invariants."; + } + } + } + } +}; + +using Tests_SamplingHeterogeneousPostProcessing_File = + Tests_SamplingHeterogeneousPostProcessing; +using Tests_SamplingHeterogeneousPostProcessing_Rmat = + Tests_SamplingHeterogeneousPostProcessing; + +TEST_P(Tests_SamplingHeterogeneousPostProcessing_File, CheckInt32Int32) +{ + run_current_test(override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingHeterogeneousPostProcessing_Rmat, CheckInt32Int32) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingHeterogeneousPostProcessing_Rmat, CheckInt32Int64) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingHeterogeneousPostProcessing_Rmat, CheckInt64Int64) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SamplingHeterogeneousPostProcessing_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 15}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SamplingHeterogeneousPostProcessing_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 1, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{1, 16, 4, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {10}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, false, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, false, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 1, {5, 10, 25}, true, true, true}, + SamplingHeterogeneousPostProcessing_Usecase{32, 16, 4, {5, 10, 25}, true, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, + Tests_SamplingHeterogeneousPostProcessing_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {10}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {10}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 1, 64, 1, {5, 10, 15}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 1, 64, 16, {5, 10, 15}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 1, 64, 16, {5, 10, 15}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 1, 64, 16, {5, 10, 15}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {5, 10, 15}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 1, 64, 16, {5, 10, 15}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {5, 10, 15}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {5, 10, 15}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 1, {5, 10, 15}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{1, 64, 16, {5, 10, 15}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {10}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 16, {10}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, false, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, false, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, false, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, false, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, true, false, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, true, false, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 1, {5, 10, 15}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, true, true, false, false}, + SamplingHeterogeneousPostProcessing_Usecase{128, 64, 1, {5, 10, 15}, true, true, true, false}, + SamplingHeterogeneousPostProcessing_Usecase{ + 128, 64, 16, {5, 10, 15}, true, true, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cpp similarity index 52% rename from cpp/tests/sampling/sampling_post_processing_test.cu rename to cpp/tests/sampling/sampling_post_processing_test.cpp index ecec1d0ed89..b262794d26d 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cpp @@ -14,30 +14,17 @@ * limitations under the License. */ +#include "detail/sampling_post_processing_validate.hpp" #include "utilities/base_fixture.hpp" -#include -#include #include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include struct SamplingPostProcessing_Usecase { @@ -53,385 +40,6 @@ struct SamplingPostProcessing_Usecase { bool check_correctness{true}; }; -template -bool compare_edgelist(raft::handle_t const& handle, - raft::device_span org_edgelist_srcs, - raft::device_span org_edgelist_dsts, - std::optional> org_edgelist_weights, - raft::device_span renumbered_edgelist_srcs, - raft::device_span renumbered_edgelist_dsts, - std::optional> renumbered_edgelist_weights, - std::optional> renumber_map) -{ - if (org_edgelist_srcs.size() != renumbered_edgelist_srcs.size()) { return false; } - - rmm::device_uvector sorted_org_edgelist_srcs(org_edgelist_srcs.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_srcs.begin(), - org_edgelist_srcs.end(), - sorted_org_edgelist_srcs.begin()); - rmm::device_uvector sorted_org_edgelist_dsts(org_edgelist_dsts.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_dsts.begin(), - org_edgelist_dsts.end(), - sorted_org_edgelist_dsts.begin()); - auto sorted_org_edgelist_weights = org_edgelist_weights - ? std::make_optional>( - (*org_edgelist_weights).size(), handle.get_stream()) - : std::nullopt; - if (sorted_org_edgelist_weights) { - thrust::copy(handle.get_thrust_policy(), - (*org_edgelist_weights).begin(), - (*org_edgelist_weights).end(), - (*sorted_org_edgelist_weights).begin()); - } - - if (sorted_org_edgelist_weights) { - auto sorted_org_edge_first = thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), - sorted_org_edgelist_dsts.begin(), - (*sorted_org_edgelist_weights).begin()); - thrust::sort(handle.get_thrust_policy(), - sorted_org_edge_first, - sorted_org_edge_first + sorted_org_edgelist_srcs.size()); - } else { - auto sorted_org_edge_first = - thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), sorted_org_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - sorted_org_edge_first, - sorted_org_edge_first + sorted_org_edgelist_srcs.size()); - } - - rmm::device_uvector sorted_unrenumbered_edgelist_srcs(renumbered_edgelist_srcs.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - renumbered_edgelist_srcs.begin(), - renumbered_edgelist_srcs.end(), - sorted_unrenumbered_edgelist_srcs.begin()); - rmm::device_uvector sorted_unrenumbered_edgelist_dsts(renumbered_edgelist_dsts.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - renumbered_edgelist_dsts.begin(), - renumbered_edgelist_dsts.end(), - sorted_unrenumbered_edgelist_dsts.begin()); - auto sorted_unrenumbered_edgelist_weights = - renumbered_edgelist_weights ? std::make_optional>( - (*renumbered_edgelist_weights).size(), handle.get_stream()) - : std::nullopt; - if (sorted_unrenumbered_edgelist_weights) { - thrust::copy(handle.get_thrust_policy(), - (*renumbered_edgelist_weights).begin(), - (*renumbered_edgelist_weights).end(), - (*sorted_unrenumbered_edgelist_weights).begin()); - } - - if (renumber_map) { - cugraph::unrenumber_int_vertices( - handle, - sorted_unrenumbered_edgelist_srcs.data(), - sorted_unrenumbered_edgelist_srcs.size(), - (*renumber_map).data(), - std::vector{static_cast((*renumber_map).size())}); - cugraph::unrenumber_int_vertices( - handle, - sorted_unrenumbered_edgelist_dsts.data(), - sorted_unrenumbered_edgelist_dsts.size(), - (*renumber_map).data(), - std::vector{static_cast((*renumber_map).size())}); - } - - if (sorted_unrenumbered_edgelist_weights) { - auto sorted_unrenumbered_edge_first = - thrust::make_zip_iterator(sorted_unrenumbered_edgelist_srcs.begin(), - sorted_unrenumbered_edgelist_dsts.begin(), - (*sorted_unrenumbered_edgelist_weights).begin()); - thrust::sort(handle.get_thrust_policy(), - sorted_unrenumbered_edge_first, - sorted_unrenumbered_edge_first + sorted_unrenumbered_edgelist_srcs.size()); - - auto sorted_org_edge_first = thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), - sorted_org_edgelist_dsts.begin(), - (*sorted_org_edgelist_weights).begin()); - return thrust::equal(handle.get_thrust_policy(), - sorted_org_edge_first, - sorted_org_edge_first + sorted_org_edgelist_srcs.size(), - sorted_unrenumbered_edge_first); - } else { - auto sorted_unrenumbered_edge_first = thrust::make_zip_iterator( - sorted_unrenumbered_edgelist_srcs.begin(), sorted_unrenumbered_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - sorted_unrenumbered_edge_first, - sorted_unrenumbered_edge_first + sorted_unrenumbered_edgelist_srcs.size()); - - auto sorted_org_edge_first = - thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), sorted_org_edgelist_dsts.begin()); - return thrust::equal(handle.get_thrust_policy(), - sorted_org_edge_first, - sorted_org_edge_first + sorted_org_edgelist_srcs.size(), - sorted_unrenumbered_edge_first); - } -} - -template -bool check_renumber_map_invariants( - raft::handle_t const& handle, - std::optional> starting_vertices, - raft::device_span org_edgelist_srcs, - raft::device_span org_edgelist_dsts, - std::optional> org_edgelist_hops, - raft::device_span renumber_map, - bool src_is_major) -{ - // Check the invariants in renumber_map - // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique vertices, - // where flag is 0 for sources and 1 for destinations. Then, vertices with smaller (hop, flag) - // pairs should be renumbered to smaller numbers than vertices with larger (hop, flag) pairs. - auto org_edgelist_majors = src_is_major ? org_edgelist_srcs : org_edgelist_dsts; - auto org_edgelist_minors = src_is_major ? org_edgelist_dsts : org_edgelist_srcs; - - rmm::device_uvector unique_majors(org_edgelist_majors.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_majors.begin(), - org_edgelist_majors.end(), - unique_majors.begin()); - if (starting_vertices) { - auto old_size = unique_majors.size(); - unique_majors.resize(old_size + (*starting_vertices).size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - (*starting_vertices).begin(), - (*starting_vertices).end(), - unique_majors.begin() + old_size); - } - - std::optional> unique_major_hops = - org_edgelist_hops ? std::make_optional>( - (*org_edgelist_hops).size(), handle.get_stream()) - : std::nullopt; - if (org_edgelist_hops) { - thrust::copy(handle.get_thrust_policy(), - (*org_edgelist_hops).begin(), - (*org_edgelist_hops).end(), - (*unique_major_hops).begin()); - if (starting_vertices) { - auto old_size = (*unique_major_hops).size(); - (*unique_major_hops).resize(old_size + (*starting_vertices).size(), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - (*unique_major_hops).begin() + old_size, - (*unique_major_hops).end(), - int32_t{0}); - } - - auto pair_first = - thrust::make_zip_iterator(unique_majors.begin(), (*unique_major_hops).begin()); - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_majors.size()); - unique_majors.resize( - thrust::distance(unique_majors.begin(), - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - unique_majors.begin(), - unique_majors.end(), - (*unique_major_hops).begin()))), - handle.get_stream()); - (*unique_major_hops).resize(unique_majors.size(), handle.get_stream()); - } else { - thrust::sort(handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end()); - unique_majors.resize( - thrust::distance( - unique_majors.begin(), - thrust::unique(handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end())), - handle.get_stream()); - } - - rmm::device_uvector unique_minors(org_edgelist_minors.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_minors.begin(), - org_edgelist_minors.end(), - unique_minors.begin()); - std::optional> unique_minor_hops = - org_edgelist_hops ? std::make_optional>( - (*org_edgelist_hops).size(), handle.get_stream()) - : std::nullopt; - if (org_edgelist_hops) { - thrust::copy(handle.get_thrust_policy(), - (*org_edgelist_hops).begin(), - (*org_edgelist_hops).end(), - (*unique_minor_hops).begin()); - - auto pair_first = - thrust::make_zip_iterator(unique_minors.begin(), (*unique_minor_hops).begin()); - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_minors.size()); - unique_minors.resize( - thrust::distance(unique_minors.begin(), - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - unique_minors.begin(), - unique_minors.end(), - (*unique_minor_hops).begin()))), - handle.get_stream()); - (*unique_minor_hops).resize(unique_minors.size(), handle.get_stream()); - } else { - thrust::sort(handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end()); - unique_minors.resize( - thrust::distance( - unique_minors.begin(), - thrust::unique(handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end())), - handle.get_stream()); - } - - rmm::device_uvector sorted_org_vertices(renumber_map.size(), handle.get_stream()); - rmm::device_uvector matching_renumbered_vertices(sorted_org_vertices.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - renumber_map.begin(), - renumber_map.end(), - sorted_org_vertices.begin()); - thrust::sequence(handle.get_thrust_policy(), - matching_renumbered_vertices.begin(), - matching_renumbered_vertices.end(), - vertex_t{0}); - thrust::sort_by_key(handle.get_thrust_policy(), - sorted_org_vertices.begin(), - sorted_org_vertices.end(), - matching_renumbered_vertices.begin()); - - if (org_edgelist_hops) { - rmm::device_uvector merged_vertices(unique_majors.size() + unique_minors.size(), - handle.get_stream()); - rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); - rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); - - auto major_triplet_first = thrust::make_zip_iterator(unique_majors.begin(), - (*unique_major_hops).begin(), - thrust::make_constant_iterator(int8_t{0})); - auto minor_triplet_first = thrust::make_zip_iterator(unique_minors.begin(), - (*unique_minor_hops).begin(), - thrust::make_constant_iterator(int8_t{1})); - thrust::merge(handle.get_thrust_policy(), - major_triplet_first, - major_triplet_first + unique_majors.size(), - minor_triplet_first, - minor_triplet_first + unique_minors.size(), - thrust::make_zip_iterator( - merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); - merged_vertices.resize( - thrust::distance(merged_vertices.begin(), - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), - merged_vertices.begin(), - merged_vertices.end(), - thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), - handle.get_stream()); - merged_hops.resize(merged_vertices.size(), handle.get_stream()); - merged_flags.resize(merged_vertices.size(), handle.get_stream()); - - auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - merged_vertices.begin()); - - auto num_unique_keys = thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(merged_hops.size()), - cugraph::detail::is_first_in_run_t{sort_key_first}); - rmm::device_uvector min_vertices(num_unique_keys, handle.get_stream()); - rmm::device_uvector max_vertices(num_unique_keys, handle.get_stream()); - - auto renumbered_merged_vertex_first = thrust::make_transform_iterator( - merged_vertices.begin(), - cuda::proclaim_return_type( - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - })); - - thrust::reduce_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - renumbered_merged_vertex_first, - thrust::make_discard_iterator(), - min_vertices.begin(), - thrust::equal_to>{}, - thrust::minimum{}); - thrust::reduce_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - renumbered_merged_vertex_first, - thrust::make_discard_iterator(), - max_vertices.begin(), - thrust::equal_to>{}, - thrust::maximum{}); - - auto num_violations = thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{1}), - thrust::make_counting_iterator(min_vertices.size()), - [min_vertices = raft::device_span(min_vertices.data(), min_vertices.size()), - max_vertices = raft::device_span(max_vertices.data(), - max_vertices.size())] __device__(size_t i) { - return min_vertices[i] <= max_vertices[i - 1]; - }); - - return (num_violations == 0); - } else { - unique_minors.resize( - thrust::distance( - unique_minors.begin(), - thrust::remove_if(handle.get_thrust_policy(), - unique_minors.begin(), - unique_minors.end(), - [sorted_unique_majors = raft::device_span( - unique_majors.data(), unique_majors.size())] __device__(auto minor) { - return thrust::binary_search(thrust::seq, - sorted_unique_majors.begin(), - sorted_unique_majors.end(), - minor); - })), - handle.get_stream()); - - auto max_major_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_majors.begin(), - unique_majors.end(), - cuda::proclaim_return_type( - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) -> vertex_t { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }), - std::numeric_limits::lowest(), - thrust::maximum{}); - - auto min_minor_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_minors.begin(), - unique_minors.end(), - cuda::proclaim_return_type( - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t minor) -> vertex_t { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }), - std::numeric_limits::max(), - thrust::minimum{}); - - return (max_major_renumbered_vertex < min_minor_renumbered_vertex); - } -} - template class Tests_SamplingPostProcessing : public ::testing::TestWithParam> { @@ -450,7 +58,7 @@ class Tests_SamplingPostProcessing { using label_t = int32_t; using weight_t = float; - using edge_id_t = vertex_t; + using edge_id_t = edge_t; using edge_type_t = int32_t; bool constexpr store_transposed = false; @@ -462,6 +70,8 @@ class Tests_SamplingPostProcessing raft::handle_t handle{}; HighResTimer hr_timer{}; + // 1. create a graph + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.start("Construct graph"); @@ -481,6 +91,8 @@ class Tests_SamplingPostProcessing auto edge_weight_view = edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + // 2. seed vertices (& labels) + raft::random::RngState rng_state(0); rmm::device_uvector starting_vertices( @@ -503,20 +115,22 @@ class Tests_SamplingPostProcessing sampling_post_processing_usecase.num_labels + 1, handle.get_stream()) : std::nullopt; if (starting_vertex_labels) { - thrust::tabulate( - handle.get_thrust_policy(), - (*starting_vertex_labels).begin(), - (*starting_vertex_labels).end(), - [num_seeds_per_label = sampling_post_processing_usecase.num_seeds_per_label] __device__( - size_t i) { return static_cast(i / num_seeds_per_label); }); - thrust::tabulate( - handle.get_thrust_policy(), - (*starting_vertex_label_offsets).begin(), - (*starting_vertex_label_offsets).end(), - [num_seeds_per_label = sampling_post_processing_usecase.num_seeds_per_label] __device__( - size_t i) { return num_seeds_per_label * i; }); + auto num_seeds_per_label = sampling_post_processing_usecase.num_seeds_per_label; + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { + cugraph::detail::scalar_fill(handle.get_stream(), + (*starting_vertex_labels).data() + i * num_seeds_per_label, + num_seeds_per_label, + static_cast(i)); + } + cugraph::detail::stride_fill(handle.get_stream(), + (*starting_vertex_label_offsets).data(), + (*starting_vertex_label_offsets).size(), + size_t{0}, + num_seeds_per_label); } + // 3. sampling + rmm::device_uvector org_edgelist_srcs(0, handle.get_stream()); rmm::device_uvector org_edgelist_dsts(0, handle.get_stream()); std::optional> org_edgelist_weights{std::nullopt}; @@ -562,6 +176,8 @@ class Tests_SamplingPostProcessing std::swap(org_edgelist_srcs, org_edgelist_dsts); } + // 4. post processing: renumber & sort + { rmm::device_uvector renumbered_and_sorted_edgelist_srcs(org_edgelist_srcs.size(), handle.get_stream()); @@ -652,178 +268,138 @@ class Tests_SamplingPostProcessing if (sampling_post_processing_usecase.check_correctness) { if (renumbered_and_sorted_edgelist_label_hop_offsets) { - ASSERT_TRUE((*renumbered_and_sorted_edgelist_label_hop_offsets).size() == - sampling_post_processing_usecase.num_labels * - sampling_post_processing_usecase.fanouts.size() + - 1) - << "Renumbered and sorted edge list (label,hop) offset array size should coincide with " - "the number of labels * the number of hops + 1."; - - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - (*renumbered_and_sorted_edgelist_label_hop_offsets).begin(), - (*renumbered_and_sorted_edgelist_label_hop_offsets).end())) - << "Renumbered and sorted edge list (label,hop) offset array values should be " - "non-decreasing."; - - ASSERT_TRUE( - (*renumbered_and_sorted_edgelist_label_hop_offsets).back_element(handle.get_stream()) == - renumbered_and_sorted_edgelist_srcs.size()) - << "Renumbered and sorted edge list (label,hop) offset array's last element should " - "coincide with the number of edges."; + ASSERT_TRUE(check_offsets(handle, + raft::device_span( + (*renumbered_and_sorted_edgelist_label_hop_offsets).data(), + (*renumbered_and_sorted_edgelist_label_hop_offsets).size()), + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size(), + renumbered_and_sorted_edgelist_srcs.size())) + << "Renumbered and sorted edge (label, hop) offset array is invalid."; } if (renumbered_and_sorted_renumber_map_label_offsets) { - ASSERT_TRUE((*renumbered_and_sorted_renumber_map_label_offsets).size() == - sampling_post_processing_usecase.num_labels + 1) - << "Renumbered and sorted offset (label, hop) offset array size should coincide with " - "the number of labels + 1."; - - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - (*renumbered_and_sorted_renumber_map_label_offsets).begin(), - (*renumbered_and_sorted_renumber_map_label_offsets).end())) - << "Renumbered and sorted renumber map label offset array values should be " - "non-decreasing."; - - ASSERT_TRUE( - (*renumbered_and_sorted_renumber_map_label_offsets).back_element(handle.get_stream()) == - renumbered_and_sorted_renumber_map.size()) - << "Renumbered and sorted renumber map label offset array's last value should coincide " - "with the renumber map size."; + ASSERT_TRUE(check_offsets(handle, + raft::device_span( + (*renumbered_and_sorted_renumber_map_label_offsets).data(), + (*renumbered_and_sorted_renumber_map_label_offsets).size()), + sampling_post_processing_usecase.num_labels, + renumbered_and_sorted_renumber_map.size())) + << "Renumbered and sorted renumber map label offset array is invalid."; } - for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { - size_t starting_vertex_start_offset = - starting_vertex_label_offsets - ? (*starting_vertex_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t starting_vertex_end_offset = - starting_vertex_label_offsets - ? (*starting_vertex_label_offsets).element(i + 1, handle.get_stream()) - : starting_vertices.size(); + // check whether the edges are properly sorted + + auto renumbered_and_sorted_edgelist_majors = + sampling_post_processing_usecase.src_is_major + ? raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()) + : raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()); + auto renumbered_and_sorted_edgelist_minors = + sampling_post_processing_usecase.src_is_major + ? raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()) + : raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()); - size_t edgelist_start_offset = - org_edgelist_label_offsets - ? (*org_edgelist_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t edgelist_end_offset = - org_edgelist_label_offsets - ? (*org_edgelist_label_offsets).element(i + 1, handle.get_stream()) - : org_edgelist_srcs.size(); - if (edgelist_start_offset == edgelist_end_offset) continue; - - auto this_label_starting_vertices = raft::device_span( - starting_vertices.data() + starting_vertex_start_offset, - starting_vertex_end_offset - starting_vertex_start_offset); - - auto this_label_org_edgelist_srcs = - raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_org_edgelist_dsts = - raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_org_edgelist_hops = - org_edgelist_hops ? std::make_optional>( - (*org_edgelist_hops).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - auto this_label_org_edgelist_weights = - org_edgelist_weights ? std::make_optional>( - (*org_edgelist_weights).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - - auto this_label_output_edgelist_srcs = raft::device_span( - renumbered_and_sorted_edgelist_srcs.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_output_edgelist_dsts = raft::device_span( - renumbered_and_sorted_edgelist_dsts.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_output_edgelist_weights = - renumbered_and_sorted_edgelist_weights - ? std::make_optional>( - (*renumbered_and_sorted_edgelist_weights).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - - size_t renumber_map_start_offset = - renumbered_and_sorted_renumber_map_label_offsets - ? (*renumbered_and_sorted_renumber_map_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t renumber_map_end_offset = renumbered_and_sorted_renumber_map_label_offsets - ? (*renumbered_and_sorted_renumber_map_label_offsets) - .element(i + 1, handle.get_stream()) - : renumbered_and_sorted_renumber_map.size(); - auto this_label_output_renumber_map = raft::device_span( - renumbered_and_sorted_renumber_map.data() + renumber_map_start_offset, - renumber_map_end_offset - renumber_map_start_offset); - - // check whether the edges are properly sorted - - auto this_label_output_edgelist_majors = sampling_post_processing_usecase.src_is_major - ? this_label_output_edgelist_srcs - : this_label_output_edgelist_dsts; - auto this_label_output_edgelist_minors = sampling_post_processing_usecase.src_is_major - ? this_label_output_edgelist_dsts - : this_label_output_edgelist_srcs; - - if (this_label_org_edgelist_hops) { - auto num_hops = sampling_post_processing_usecase.fanouts.size(); - auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), - this_label_output_edgelist_minors.begin()); - for (size_t j = 0; j < num_hops; ++j) { - auto hop_start_offset = (*renumbered_and_sorted_edgelist_label_hop_offsets) - .element(i * num_hops + j, handle.get_stream()) - - (*renumbered_and_sorted_edgelist_label_hop_offsets) - .element(i * num_hops, handle.get_stream()); - auto hop_end_offset = (*renumbered_and_sorted_edgelist_label_hop_offsets) - .element(i * num_hops + j + 1, handle.get_stream()) - - (*renumbered_and_sorted_edgelist_label_hop_offsets) - .element(i * num_hops, handle.get_stream()); - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - edge_first + hop_start_offset, - edge_first + hop_end_offset)) - << "Renumbered and sorted output edges are not properly sorted."; - } - } else { - auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), - this_label_output_edgelist_minors.begin()); - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - edge_first, - edge_first + this_label_output_edgelist_majors.size())) - << "Renumbered and sorted output edges are not properly sorted."; + if (renumbered_and_sorted_edgelist_label_hop_offsets) { + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size(); + ++i) { + auto hop_start_offset = + (*renumbered_and_sorted_edgelist_label_hop_offsets).element(i, handle.get_stream()); + auto hop_end_offset = (*renumbered_and_sorted_edgelist_label_hop_offsets) + .element(i + 1, handle.get_stream()); + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span( + renumbered_and_sorted_edgelist_majors.data() + hop_start_offset, + hop_end_offset - hop_start_offset), + raft::device_span( + renumbered_and_sorted_edgelist_minors.data() + hop_start_offset, + hop_end_offset - hop_start_offset))) + << "Renumbered and sorted edge list is not properly sorted."; } + } else { + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span(renumbered_and_sorted_edgelist_majors.data(), + renumbered_and_sorted_edgelist_majors.size()), + raft::device_span(renumbered_and_sorted_edgelist_minors.data(), + renumbered_and_sorted_edgelist_minors.size()))) + << "Renumbered and sorted edge list is not properly sorted."; + } - // check whether renumbering recovers the original edge list - - ASSERT_TRUE(compare_edgelist(handle, - this_label_org_edgelist_srcs, - this_label_org_edgelist_dsts, - this_label_org_edgelist_weights, - this_label_output_edgelist_srcs, - this_label_output_edgelist_dsts, - this_label_output_edgelist_weights, - std::make_optional(this_label_output_renumber_map))) - << "Unrenumbering the renumbered and sorted edge list does not recover the original " - "edgelist."; + ASSERT_TRUE(compare_edgelist( + handle, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data(), (*org_edgelist_weights).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional>( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size()) + : std::nullopt, + raft::device_span(renumbered_and_sorted_edgelist_srcs.data(), + renumbered_and_sorted_edgelist_srcs.size()), + raft::device_span(renumbered_and_sorted_edgelist_dsts.data(), + renumbered_and_sorted_edgelist_dsts.size()), + renumbered_and_sorted_edgelist_weights + ? std::make_optional>( + (*renumbered_and_sorted_edgelist_weights).data(), + (*renumbered_and_sorted_edgelist_weights).size()) + : std::nullopt, + std::make_optional>( + renumbered_and_sorted_renumber_map.data(), renumbered_and_sorted_renumber_map.size()), + renumbered_and_sorted_renumber_map_label_offsets + ? std::make_optional>( + (*renumbered_and_sorted_renumber_map_label_offsets).data(), + (*renumbered_and_sorted_renumber_map_label_offsets).size()) + : std::nullopt, + sampling_post_processing_usecase.num_labels)) + << "Unrenumbering the renumbered and sorted edge list does not recover the original " + "edgelist."; - // Check the invariants in renumber_map + // Check the invariants in renumber_map - ASSERT_TRUE(check_renumber_map_invariants( - handle, - sampling_post_processing_usecase.renumber_with_seeds - ? std::make_optional>( - this_label_starting_vertices.data(), this_label_starting_vertices.size()) - : std::nullopt, - this_label_org_edgelist_srcs, - this_label_org_edgelist_dsts, - this_label_org_edgelist_hops, - this_label_output_renumber_map, - sampling_post_processing_usecase.src_is_major)) - << "Renumbered and sorted output renumber map violates invariants."; - } + ASSERT_TRUE(check_vertex_renumber_map_invariants( + handle, + sampling_post_processing_usecase.renumber_with_seeds + ? std::make_optional>(starting_vertices.data(), + starting_vertices.size()) + : std::nullopt, + (sampling_post_processing_usecase.renumber_with_seeds && starting_vertex_label_offsets) + ? std::make_optional>( + (*starting_vertex_label_offsets).data(), (*starting_vertex_label_offsets).size()) + : std::nullopt, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data(), (*org_edgelist_hops).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional>( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size()) + : std::nullopt, + raft::device_span(renumbered_and_sorted_renumber_map.data(), + renumbered_and_sorted_renumber_map.size()), + renumbered_and_sorted_renumber_map_label_offsets + ? std::make_optional>( + (*renumbered_and_sorted_renumber_map_label_offsets).data(), + (*renumbered_and_sorted_renumber_map_label_offsets).size()) + : std::nullopt, + std::nullopt, + sampling_post_processing_usecase.num_labels, + 1, + sampling_post_processing_usecase.src_is_major)) + << "Renumbered and sorted output renumber map violates invariants."; } } + // 5. post processing: renumber & compress + { rmm::device_uvector renumbered_and_compressed_edgelist_srcs( org_edgelist_srcs.size(), handle.get_stream()); @@ -921,126 +497,52 @@ class Tests_SamplingPostProcessing } if (sampling_post_processing_usecase.check_correctness) { - if (renumbered_and_compressed_nzd_vertices) { - ASSERT_TRUE(renumbered_and_compressed_offsets.size() == - (*renumbered_and_compressed_nzd_vertices).size() + 1) - << "Renumbered and compressed offset array size should coincide with the number of " - "non-zero-degree vertices + 1."; - } - - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - renumbered_and_compressed_offsets.begin(), - renumbered_and_compressed_offsets.end())) - << "Renumbered and compressed offset array values should be non-decreasing."; - - ASSERT_TRUE(renumbered_and_compressed_offsets.back_element(handle.get_stream()) == - renumbered_and_compressed_edgelist_minors.size()) - << "Renumbered and compressed offset array's last value should coincide with the number " - "of " - "edges."; + ASSERT_TRUE(check_offsets( + handle, + raft::device_span(renumbered_and_compressed_offsets.data(), + renumbered_and_compressed_offsets.size()), + renumbered_and_compressed_nzd_vertices ? (*renumbered_and_compressed_nzd_vertices).size() + : renumbered_and_compressed_offsets.size() - 1, + renumbered_and_compressed_edgelist_minors.size())) + << "Renumbered and compressed offset array is invalid"; if (renumbered_and_compressed_offset_label_hop_offsets) { - ASSERT_TRUE((*renumbered_and_compressed_offset_label_hop_offsets).size() == - sampling_post_processing_usecase.num_labels * - sampling_post_processing_usecase.fanouts.size() + - 1) - << "Renumbered and compressed offset (label,hop) offset array size should coincide " - "with " - "the number of labels * the number of hops + 1."; - - ASSERT_TRUE( - thrust::is_sorted(handle.get_thrust_policy(), - (*renumbered_and_compressed_offset_label_hop_offsets).begin(), - (*renumbered_and_compressed_offset_label_hop_offsets).end())) - << "Renumbered and compressed offset (label,hop) offset array values should be " - "non-decreasing."; - - ASSERT_TRUE((*renumbered_and_compressed_offset_label_hop_offsets) - .back_element(handle.get_stream()) == - renumbered_and_compressed_offsets.size() - 1) - << "Renumbered and compressed offset (label,hop) offset array's last value should " - "coincide with the offset array size - 1."; + ASSERT_TRUE(check_offsets(handle, + raft::device_span( + (*renumbered_and_compressed_offset_label_hop_offsets).data(), + (*renumbered_and_compressed_offset_label_hop_offsets).size()), + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size(), + renumbered_and_compressed_offsets.size() - 1)) + << "Renumbered and compressed offset (label, hop) offset array is invalid"; } if (renumbered_and_compressed_renumber_map_label_offsets) { - ASSERT_TRUE((*renumbered_and_compressed_renumber_map_label_offsets).size() == - sampling_post_processing_usecase.num_labels + 1) - << "Renumbered and compressed offset (label, hop) offset array size should coincide " - "with " - "the number of labels + 1."; - ASSERT_TRUE( - thrust::is_sorted(handle.get_thrust_policy(), - (*renumbered_and_compressed_renumber_map_label_offsets).begin(), - (*renumbered_and_compressed_renumber_map_label_offsets).end())) - << "Renumbered and compressed renumber map label offset array values should be " - "non-decreasing."; - - ASSERT_TRUE((*renumbered_and_compressed_renumber_map_label_offsets) - .back_element(handle.get_stream()) == - renumbered_and_compressed_renumber_map.size()) - << "Renumbered and compressed renumber map label offset array's last value should " - "coincide with the renumber map size."; + check_offsets(handle, + raft::device_span( + (*renumbered_and_compressed_renumber_map_label_offsets).data(), + (*renumbered_and_compressed_renumber_map_label_offsets).size()), + sampling_post_processing_usecase.num_labels, + renumbered_and_compressed_renumber_map.size())) + << "Renumbered and compressed renumber map label offset array is invalid"; } - for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { - size_t starting_vertex_start_offset = - starting_vertex_label_offsets - ? (*starting_vertex_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t starting_vertex_end_offset = - starting_vertex_label_offsets - ? (*starting_vertex_label_offsets).element(i + 1, handle.get_stream()) - : starting_vertices.size(); - - size_t edgelist_start_offset = - org_edgelist_label_offsets - ? (*org_edgelist_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t edgelist_end_offset = - org_edgelist_label_offsets - ? (*org_edgelist_label_offsets).element(i + 1, handle.get_stream()) - : org_edgelist_srcs.size(); - if (edgelist_start_offset == edgelist_end_offset) continue; - - auto this_label_starting_vertices = raft::device_span( - starting_vertices.data() + starting_vertex_start_offset, - starting_vertex_end_offset - starting_vertex_start_offset); - - auto this_label_org_edgelist_srcs = - raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_org_edgelist_dsts = - raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_org_edgelist_hops = - org_edgelist_hops ? std::make_optional>( - (*org_edgelist_hops).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - auto this_label_org_edgelist_weights = - org_edgelist_weights ? std::make_optional>( - (*org_edgelist_weights).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - - rmm::device_uvector this_label_output_edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector this_label_output_edgelist_dsts(0, handle.get_stream()); - auto this_label_output_edgelist_weights = - renumbered_and_compressed_edgelist_weights - ? std::make_optional>(0, handle.get_stream()) - : std::nullopt; - this_label_output_edgelist_srcs.reserve(edgelist_end_offset - edgelist_start_offset, - handle.get_stream()); - this_label_output_edgelist_dsts.reserve(edgelist_end_offset - edgelist_start_offset, - handle.get_stream()); - if (this_label_output_edgelist_weights) { - (*this_label_output_edgelist_weights) - .reserve(edgelist_end_offset - edgelist_start_offset, handle.get_stream()); - } - - // decompress + // check whether renumbering recovers the original edge list + + rmm::device_uvector output_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector output_edgelist_dsts(0, handle.get_stream()); + auto output_edgelist_weights = + renumbered_and_compressed_edgelist_weights + ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; + output_edgelist_srcs.reserve(org_edgelist_srcs.size(), handle.get_stream()); + output_edgelist_dsts.reserve(org_edgelist_srcs.capacity(), handle.get_stream()); + if (output_edgelist_weights) { + (*output_edgelist_weights).reserve(org_edgelist_srcs.capacity(), handle.get_stream()); + } + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { auto num_hops = sampling_post_processing_usecase.fanouts.size(); for (size_t j = 0; j < num_hops; ++j) { auto offset_start_offset = renumbered_and_compressed_offset_label_hop_offsets @@ -1069,108 +571,123 @@ class Tests_SamplingPostProcessing h_offsets.data(), d_offsets.data(), h_offsets.size(), handle.get_stream()); handle.sync_stream(); - auto old_size = this_label_output_edgelist_srcs.size(); - this_label_output_edgelist_srcs.resize(old_size + (h_offsets.back() - h_offsets[0]), - handle.get_stream()); - this_label_output_edgelist_dsts.resize(this_label_output_edgelist_srcs.size(), - handle.get_stream()); - if (this_label_output_edgelist_weights) { - (*this_label_output_edgelist_weights) - .resize(this_label_output_edgelist_srcs.size(), handle.get_stream()); + auto old_size = output_edgelist_srcs.size(); + output_edgelist_srcs.resize(old_size + (h_offsets.back() - h_offsets[0]), + handle.get_stream()); + output_edgelist_dsts.resize(output_edgelist_srcs.size(), handle.get_stream()); + if (output_edgelist_weights) { + (*output_edgelist_weights).resize(output_edgelist_srcs.size(), handle.get_stream()); + } + if (renumbered_and_compressed_nzd_vertices) { + cugraph::test::expand_hypersparse_offsets( + handle, + raft::device_span(d_offsets.data(), d_offsets.size()), + raft::device_span( + (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, + (offset_end_offset - offset_start_offset) - 1), + raft::device_span( + (sampling_post_processing_usecase.src_is_major ? output_edgelist_srcs.data() + : output_edgelist_dsts.data()) + + old_size, + h_offsets.back() - h_offsets[0]), + h_offsets[0]); + } else { + cugraph::test::expand_sparse_offsets( + handle, + raft::device_span(d_offsets.data(), d_offsets.size()), + raft::device_span( + (sampling_post_processing_usecase.src_is_major ? output_edgelist_srcs.data() + : output_edgelist_dsts.data()) + + old_size, + h_offsets.back() - h_offsets[0]), + h_offsets[0], + base_v); } - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(h_offsets[0]), - thrust::make_counting_iterator(h_offsets.back()), - (sampling_post_processing_usecase.src_is_major - ? this_label_output_edgelist_srcs.begin() - : this_label_output_edgelist_dsts.begin()) + + raft::copy( + (sampling_post_processing_usecase.src_is_major ? output_edgelist_dsts.begin() + : output_edgelist_srcs.begin()) + old_size, - cuda::proclaim_return_type( - [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), - nzd_vertices = - renumbered_and_compressed_nzd_vertices - ? thrust::make_optional>( - (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, - (offset_end_offset - offset_start_offset) - 1) - : thrust::nullopt, - base_v] __device__(size_t i) { - auto idx = static_cast(thrust::distance( - offsets.begin() + 1, - thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); - if (nzd_vertices) { - return (*nzd_vertices)[idx]; - } else { - return base_v + static_cast(idx); - } - })); - thrust::copy(handle.get_thrust_policy(), - renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], - renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(), - (sampling_post_processing_usecase.src_is_major - ? this_label_output_edgelist_dsts.begin() - : this_label_output_edgelist_srcs.begin()) + - old_size); - if (this_label_output_edgelist_weights) { - thrust::copy(handle.get_thrust_policy(), - (*renumbered_and_compressed_edgelist_weights).begin() + h_offsets[0], - (*renumbered_and_compressed_edgelist_weights).begin() + h_offsets.back(), - (*this_label_output_edgelist_weights).begin() + old_size); + renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], + h_offsets.back() - h_offsets[0], + handle.get_stream()); + if (output_edgelist_weights) { + raft::copy((*output_edgelist_weights).begin() + old_size, + (*renumbered_and_compressed_edgelist_weights).begin() + h_offsets[0], + h_offsets.back() - h_offsets[0], + handle.get_stream()); } } - - size_t renumber_map_start_offset = - renumbered_and_compressed_renumber_map_label_offsets - ? (*renumbered_and_compressed_renumber_map_label_offsets) - .element(i, handle.get_stream()) - : size_t{0}; - size_t renumber_map_end_offset = - renumbered_and_compressed_renumber_map_label_offsets - ? (*renumbered_and_compressed_renumber_map_label_offsets) - .element(i + 1, handle.get_stream()) - : renumbered_and_compressed_renumber_map.size(); - auto this_label_output_renumber_map = raft::device_span( - renumbered_and_compressed_renumber_map.data() + renumber_map_start_offset, - renumber_map_end_offset - renumber_map_start_offset); - - // check whether renumbering recovers the original edge list - - ASSERT_TRUE(compare_edgelist( - handle, - this_label_org_edgelist_srcs, - this_label_org_edgelist_dsts, - this_label_org_edgelist_weights, - raft::device_span(this_label_output_edgelist_srcs.data(), - this_label_output_edgelist_srcs.size()), - raft::device_span(this_label_output_edgelist_dsts.data(), - this_label_output_edgelist_dsts.size()), - this_label_output_edgelist_weights - ? std::make_optional>( - (*this_label_output_edgelist_weights).data(), - (*this_label_output_edgelist_weights).size()) - : std::nullopt, - std::make_optional(this_label_output_renumber_map))) - << "Unrenumbering the renumbered and sorted edge list does not recover the original " - "edgelist."; - - // Check the invariants in renumber_map - - ASSERT_TRUE(check_renumber_map_invariants( - handle, - sampling_post_processing_usecase.renumber_with_seeds - ? std::make_optional>( - this_label_starting_vertices.data(), this_label_starting_vertices.size()) - : std::nullopt, - this_label_org_edgelist_srcs, - this_label_org_edgelist_dsts, - this_label_org_edgelist_hops, - this_label_output_renumber_map, - sampling_post_processing_usecase.src_is_major)) - << "Renumbered and sorted output renumber map violates invariants."; } + + ASSERT_TRUE(compare_edgelist( + handle, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data(), (*org_edgelist_weights).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional(raft::device_span( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size())) + : std::nullopt, + raft::device_span(output_edgelist_srcs.data(), + output_edgelist_srcs.size()), + raft::device_span(output_edgelist_dsts.data(), + output_edgelist_dsts.size()), + output_edgelist_weights + ? std::make_optional>( + (*output_edgelist_weights).data(), (*output_edgelist_weights).size()) + : std::nullopt, + std::make_optional>( + renumbered_and_compressed_renumber_map.data(), + renumbered_and_compressed_renumber_map.size()), + renumbered_and_compressed_renumber_map_label_offsets + ? std::make_optional>( + (*renumbered_and_compressed_renumber_map_label_offsets).data(), + (*renumbered_and_compressed_renumber_map_label_offsets).size()) + : std::nullopt, + sampling_post_processing_usecase.num_labels)) + << "Unrenumbering the renumbered and sorted edge list does not recover the original " + "edgelist."; + + // Check the invariants in renumber_map + + ASSERT_TRUE(check_vertex_renumber_map_invariants( + handle, + sampling_post_processing_usecase.renumber_with_seeds + ? std::make_optional>(starting_vertices.data(), + starting_vertices.size()) + : std::nullopt, + (sampling_post_processing_usecase.renumber_with_seeds && starting_vertex_label_offsets) + ? std::make_optional>( + (*starting_vertex_label_offsets).data(), (*starting_vertex_label_offsets).size()) + : std::nullopt, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data(), (*org_edgelist_hops).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional(raft::device_span( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size())) + : std::nullopt, + raft::device_span(renumbered_and_compressed_renumber_map.data(), + renumbered_and_compressed_renumber_map.size()), + renumbered_and_compressed_renumber_map_label_offsets + ? std::make_optional>( + (*renumbered_and_compressed_renumber_map_label_offsets).data(), + (*renumbered_and_compressed_renumber_map_label_offsets).size()) + : std::nullopt, + std::nullopt, + sampling_post_processing_usecase.num_labels, + 1, + sampling_post_processing_usecase.src_is_major)) + << "Renumbered and sorted output renumber map violates invariants."; } } + // 6. post processing: sort only + { rmm::device_uvector sorted_edgelist_srcs(org_edgelist_srcs.size(), handle.get_stream()); @@ -1245,25 +762,42 @@ class Tests_SamplingPostProcessing if (sampling_post_processing_usecase.check_correctness) { if (sorted_edgelist_label_hop_offsets) { - ASSERT_TRUE((*sorted_edgelist_label_hop_offsets).size() == - sampling_post_processing_usecase.num_labels * - sampling_post_processing_usecase.fanouts.size() + - 1) - << "Sorted edge list (label,hop) offset array size should coincide with " - "the number of labels * the number of hops + 1."; - - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - (*sorted_edgelist_label_hop_offsets).begin(), - (*sorted_edgelist_label_hop_offsets).end())) - << "Sorted edge list (label,hop) offset array values should be " - "non-decreasing."; - - ASSERT_TRUE((*sorted_edgelist_label_hop_offsets).back_element(handle.get_stream()) == - sorted_edgelist_srcs.size()) - << "Sorted edge list (label,hop) offset array's last element should coincide with the " - "number of edges."; + ASSERT_TRUE(check_offsets( + handle, + raft::device_span((*sorted_edgelist_label_hop_offsets).data(), + (*sorted_edgelist_label_hop_offsets).size()), + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size(), + sorted_edgelist_srcs.size())) + << "Sorted edge list (label, hop) offset array is invalid."; } + // check whether renumbering recovers the original edge list + + ASSERT_TRUE(compare_edgelist( + handle, + raft::device_span(org_edgelist_srcs.data(), org_edgelist_srcs.size()), + raft::device_span(org_edgelist_dsts.data(), org_edgelist_dsts.size()), + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data(), (*org_edgelist_weights).size()) + : std::nullopt, + org_edgelist_label_offsets + ? std::make_optional(raft::device_span( + (*org_edgelist_label_offsets).data(), (*org_edgelist_label_offsets).size())) + : std::nullopt, + raft::device_span(sorted_edgelist_srcs.data(), + sorted_edgelist_srcs.size()), + raft::device_span(sorted_edgelist_dsts.data(), + sorted_edgelist_dsts.size()), + sorted_edgelist_weights + ? std::make_optional>( + (*sorted_edgelist_weights).data(), (*sorted_edgelist_weights).size()) + : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + sampling_post_processing_usecase.num_labels)) + << "Sorted edge list does not coincide with the original edgelist."; + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { size_t edgelist_start_offset = org_edgelist_label_offsets @@ -1314,9 +848,7 @@ class Tests_SamplingPostProcessing : this_label_output_edgelist_srcs; if (this_label_org_edgelist_hops) { - auto num_hops = sampling_post_processing_usecase.fanouts.size(); - auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), - this_label_output_edgelist_minors.begin()); + auto num_hops = sampling_post_processing_usecase.fanouts.size(); for (size_t j = 0; j < num_hops; ++j) { auto hop_start_offset = (*sorted_edgelist_label_hop_offsets) @@ -1326,32 +858,25 @@ class Tests_SamplingPostProcessing (*sorted_edgelist_label_hop_offsets) .element(i * num_hops + j + 1, handle.get_stream()) - (*sorted_edgelist_label_hop_offsets).element(i * num_hops, handle.get_stream()); - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - edge_first + hop_start_offset, - edge_first + hop_end_offset)) - << "Renumbered and sorted output edges are not properly sorted."; + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span( + this_label_output_edgelist_majors.data() + hop_start_offset, + hop_end_offset - hop_start_offset), + raft::device_span( + this_label_output_edgelist_minors.data() + hop_start_offset, + hop_end_offset - hop_start_offset))) + << "Sorted edge list is not properly sorted."; } } else { - auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), - this_label_output_edgelist_minors.begin()); - ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), - edge_first, - edge_first + this_label_output_edgelist_majors.size())) - << "Renumbered and sorted output edges are not properly sorted."; + ASSERT_TRUE(check_edgelist_is_sorted( + handle, + raft::device_span(this_label_output_edgelist_majors.data(), + this_label_output_edgelist_majors.size()), + raft::device_span(this_label_output_edgelist_minors.data(), + this_label_output_edgelist_minors.size()))) + << "Sorted edge list is not properly sorted."; } - - // check whether renumbering recovers the original edge list - - ASSERT_TRUE( - compare_edgelist(handle, - this_label_org_edgelist_srcs, - this_label_org_edgelist_dsts, - this_label_org_edgelist_weights, - this_label_output_edgelist_srcs, - this_label_output_edgelist_dsts, - this_label_output_edgelist_weights, - std::optional>{std::nullopt})) - << "Sorted edge list does not coincide with the original edgelist."; } } } diff --git a/cpp/tests/sampling/sg_random_walks_test.cpp b/cpp/tests/sampling/sg_random_walks_test.cpp index 7409c2ab758..4bcfebc6d51 100644 --- a/cpp/tests/sampling/sg_random_walks_test.cpp +++ b/cpp/tests/sampling/sg_random_walks_test.cpp @@ -40,8 +40,10 @@ struct UniformRandomWalks_Usecase { raft::device_span start_vertices, size_t num_paths) { + raft::random::RngState rng_state(0); + return cugraph::uniform_random_walks( - handle, graph_view, edge_weight_view, start_vertices, num_paths, seed); + handle, rng_state, graph_view, edge_weight_view, start_vertices, num_paths); } bool expect_throw() { return false; } @@ -62,12 +64,13 @@ struct BiasedRandomWalks_Usecase { { CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Biased random walk requires edge weights."); + raft::random::RngState rng_state(0); + return cugraph::biased_random_walks( - handle, graph_view, *edge_weight_view, start_vertices, num_paths, seed); + handle, rng_state, graph_view, *edge_weight_view, start_vertices, num_paths); } - // FIXME: Not currently implemented - bool expect_throw() { return true; } + bool expect_throw() { return !test_weighted; } }; struct Node2VecRandomWalks_Usecase { @@ -85,18 +88,19 @@ struct Node2VecRandomWalks_Usecase { raft::device_span start_vertices, size_t num_paths) { + raft::random::RngState rng_state(0); + return cugraph::node2vec_random_walks(handle, + rng_state, graph_view, edge_weight_view, start_vertices, num_paths, static_cast(p), - static_cast(q), - seed); + static_cast(q)); } - // FIXME: Not currently implemented - bool expect_throw() { return true; } + bool expect_throw() { return false; } }; template @@ -197,9 +201,6 @@ using Tests_Node2VecRandomWalks_File = using Tests_Node2VecRandomWalks_Rmat = Tests_RandomWalks>; -#if 0 -// FIXME: We should use these tests, gtest-1.11.0 makes it a runtime error -// to define and not instantiate these. TEST_P(Tests_UniformRandomWalks_File, Initialize_i32_i32_f) { run_current_test( @@ -211,7 +212,6 @@ TEST_P(Tests_UniformRandomWalks_Rmat, Initialize_i32_i32_f) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -#endif TEST_P(Tests_BiasedRandomWalks_File, Initialize_i32_i32_f) { @@ -237,19 +237,12 @@ TEST_P(Tests_Node2VecRandomWalks_Rmat, Initialize_i32_i32_f) override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -#if 0 -// FIXME: Not sure why these are failing, but we're refactoring anyway. INSTANTIATE_TEST_SUITE_P( simple_test, Tests_UniformRandomWalks_File, - ::testing::Combine( - ::testing::Values(UniformRandomWalks_Usecase{false, 0, true}, - UniformRandomWalks_Usecase{true, 0, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), - cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), - cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -#endif + ::testing::Combine(::testing::Values(UniformRandomWalks_Usecase{false, 0, true}, + UniformRandomWalks_Usecase{true, 0, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( file_test, @@ -265,6 +258,16 @@ INSTANTIATE_TEST_SUITE_P( Node2VecRandomWalks_Usecase{4, 8, true, 0, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_UniformRandomWalks_File, + ::testing::Combine( + ::testing::Values(UniformRandomWalks_Usecase{false, 0, true}, + UniformRandomWalks_Usecase{true, 0, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + INSTANTIATE_TEST_SUITE_P( file_large_test, Tests_BiasedRandomWalks_File, @@ -285,23 +288,20 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -#if 0 -// FIXME: Not sure why these are failing, but we're refactoring anyway. INSTANTIATE_TEST_SUITE_P( rmat_small_test, Tests_UniformRandomWalks_Rmat, - ::testing::Combine(::testing::Values(UniformRandomWalks_Usecase{false, 0, true}, - UniformRandomWalks_Usecase{true, 0, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + ::testing::Combine( + ::testing::Values(UniformRandomWalks_Usecase{false, 0, true}, + UniformRandomWalks_Usecase{true, 0, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, Tests_UniformRandomWalks_Rmat, - ::testing::Combine(::testing::Values(UniformRandomWalks_Usecase{true, 0, false}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false)))); -#endif + ::testing::Combine( + ::testing::Values(UniformRandomWalks_Usecase{true, 0, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( rmat_small_test, diff --git a/cpp/tests/utilities/property_generator_utilities.hpp b/cpp/tests/utilities/property_generator_utilities.hpp index 6bd22da1f75..f907501cc7c 100644 --- a/cpp/tests/utilities/property_generator_utilities.hpp +++ b/cpp/tests/utilities/property_generator_utilities.hpp @@ -34,6 +34,7 @@ template struct generate { private: using vertex_type = typename GraphViewType::vertex_type; + using edge_type_t = int32_t; using property_buffer_type = std::decay_t( size_t{0}, rmm::cuda_stream_view{}))>; @@ -62,6 +63,28 @@ struct generate { static cugraph::edge_property_t edge_property( raft::handle_t const& handle, GraphViewType const& graph_view, int32_t hash_bin_count); + + static cugraph::edge_property_t edge_property_by_src_dst_types( + raft::handle_t const& handle, + GraphViewType const& graph_view, + raft::device_span vertex_type_offsets, + int32_t hash_bin_count); + + // generate unqiue edge property values (in [0, # edges in the graph) if property_t is an integer + // type, this function requires std::numeric_limits::max() to be no smaller than the + // number of edges in the input graph). + static cugraph::edge_property_t unique_edge_property( + raft::handle_t const& handle, GraphViewType const& graph_view); + + // generate unique (edge property value, edge type) pairs (if property_t is an integral type, edge + // property values for each type are consecutive integers starting from 0, this function requires + // std::numeric_limits::max() to be no smaller than the number of edges in the input + // graph). + static cugraph::edge_property_t unique_edge_property_per_type( + raft::handle_t const& handle, + GraphViewType const& graph_view, + cugraph::edge_property_view_t edge_type_view, + int32_t num_edge_types); }; } // namespace test diff --git a/cpp/tests/utilities/property_generator_utilities_impl.cuh b/cpp/tests/utilities/property_generator_utilities_impl.cuh index a46009f95e3..61a861b6670 100644 --- a/cpp/tests/utilities/property_generator_utilities_impl.cuh +++ b/cpp/tests/utilities/property_generator_utilities_impl.cuh @@ -26,6 +26,7 @@ #include +#include #include #include @@ -127,5 +128,102 @@ generate::edge_property(raft::handle_t const& handle, return output_property; } +template +cugraph::edge_property_t +generate::edge_property_by_src_dst_types( + raft::handle_t const& handle, + GraphViewType const& graph_view, + raft::device_span vertex_type_offsets, + int32_t hash_bin_count) +{ + auto output_property = cugraph::edge_property_t(handle, graph_view); + + cugraph::transform_e( + handle, + graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [vertex_type_offsets, hash_bin_count] __device__(auto src, auto dst, auto, auto, auto) { + auto src_v_type = thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, vertex_type_offsets.begin() + 1, vertex_type_offsets.end(), src)); + auto dst_v_type = thrust::distance( + vertex_type_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, vertex_type_offsets.begin() + 1, vertex_type_offsets.end(), dst)); + auto num_v_types = vertex_type_offsets.size() - 1; + return detail::make_property_value((src_v_type * num_v_types + dst_v_type) % + hash_bin_count); + }, + output_property.mutable_view()); + + return output_property; +} + +template +cugraph::edge_property_t +generate::unique_edge_property(raft::handle_t const& handle, + GraphViewType const& graph_view) +{ + auto output_property = cugraph::edge_property_t(handle, graph_view); + if constexpr (std::is_integral_v && !std::is_same_v) { + CUGRAPH_EXPECTS( + graph_view.compute_number_of_edges(handle) <= std::numeric_limits::max(), + "std::numeric_limits::max() is smaller than the number of edges."); + rmm::device_scalar counter(property_t{0}, handle.get_stream()); + cugraph::transform_e( + handle, + graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [counter = counter.data()] __device__(auto, auto, auto, auto, auto) { + cuda::atomic_ref atomic_counter(*counter); + return atomic_counter.fetch_add(property_t{1}, cuda::std::memory_order_relaxed); + }, + output_property.mutable_view()); + if constexpr (GraphViewType::is_multi_gpu) { CUGRAPH_FAIL("unimplemented."); } + } else { + CUGRAPH_FAIL("unimplemented."); + } + return output_property; +} + +template +cugraph::edge_property_t +generate::unique_edge_property_per_type( + raft::handle_t const& handle, + GraphViewType const& graph_view, + cugraph::edge_property_view_t edge_type_view, + int32_t num_edge_types) +{ + auto output_property = cugraph::edge_property_t(handle, graph_view); + if constexpr (std::is_integral_v && !std::is_same_v) { + CUGRAPH_EXPECTS( + graph_view.compute_number_of_edges(handle) <= std::numeric_limits::max(), + "std::numeric_limits::max() is smaller than the number of edges."); + rmm::device_uvector counters(num_edge_types, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), counters.begin(), counters.end(), property_t{0}); + cugraph::transform_e( + handle, + graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_type_view, + [counters = raft::device_span(counters.data(), counters.size())] __device__( + auto, auto, auto, auto, int32_t edge_type) { + cuda::atomic_ref atomic_counter(counters[edge_type]); + return atomic_counter.fetch_add(property_t{1}, cuda::std::memory_order_relaxed); + }, + output_property.mutable_view()); + if constexpr (GraphViewType::is_multi_gpu) { CUGRAPH_FAIL("unimplemented."); } + } else { + CUGRAPH_FAIL("unimplemented."); + } + return output_property; +} + } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 8d26ac1f2fe..ef1c4f831eb 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -16,11 +16,15 @@ #include "utilities/thrust_wrapper.hpp" +#include +#include + #include #include #include #include +#include #include #include #include @@ -477,5 +481,70 @@ template void populate_vertex_ids(raft::handle_t const& handle, rmm::device_uvector& d_vertices_v, int64_t vertex_id_offset); +template +void expand_sparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span indices, + offset_t base_offset, + idx_t base_idx) +{ + rmm::device_uvector tmp_offsets(offsets.size(), handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + offsets.begin(), + offsets.end(), + tmp_offsets.begin(), + cugraph::detail::shift_left_t{base_offset}); + auto tmp = cugraph::detail::expand_sparse_offsets( + raft::device_span(tmp_offsets.data(), tmp_offsets.size()), + base_idx, + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), tmp.begin(), tmp.end(), indices.begin()); +} + +template void expand_sparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span indices, + size_t base_offset, + int32_t base_idx); + +template void expand_sparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span indices, + size_t base_offset, + int64_t base_idx); + +template +void expand_hypersparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span nzd_indices, + raft::device_span indices, + offset_t base_offset) +{ + rmm::device_uvector tmp_offsets(offsets.size(), handle.get_stream()); + thrust::transform(handle.get_thrust_policy(), + offsets.begin(), + offsets.end(), + tmp_offsets.begin(), + cugraph::detail::shift_left_t{base_offset}); + auto tmp = cugraph::detail::expand_sparse_offsets( + raft::device_span(tmp_offsets.data(), tmp_offsets.size()), + idx_t{0}, + handle.get_stream()); + thrust::gather( + handle.get_thrust_policy(), tmp.begin(), tmp.end(), nzd_indices.begin(), indices.begin()); +} + +template void expand_hypersparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span nzd_indices, + raft::device_span indices, + size_t base_offset); + +template void expand_hypersparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span nzd_indices, + raft::device_span indices, + size_t base_offset); + } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp index cd8bc33308f..afdff33d80a 100644 --- a/cpp/tests/utilities/thrust_wrapper.hpp +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -93,5 +93,19 @@ void populate_vertex_ids(raft::handle_t const& handle, rmm::device_uvector& d_vertices_v /* [INOUT] */, vertex_t vertex_id_offset); +template +void expand_sparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span indices, + offset_t base_offset, + idx_t base_idx); + +template +void expand_hypersparse_offsets(raft::handle_t const& handle, + raft::device_span offsets, + raft::device_span nzd_indices, + raft::device_span indices, + offset_t base_offset); + } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/validation_utilities.cu b/cpp/tests/utilities/validation_utilities.cu new file mode 100644 index 00000000000..3da998ad626 --- /dev/null +++ b/cpp/tests/utilities/validation_utilities.cu @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/graph_partition_utils.cuh" +#include "utilities/validation_utilities.hpp" + +#include + +#include +#include +#include +#include + +namespace cugraph::test { + +template +size_t count_invalid_vertices( + raft::handle_t const& handle, + raft::device_span vertices, + cugraph::vertex_partition_view_t const& vertex_partition_view) +{ + return thrust::count_if( + handle.get_thrust_policy(), + vertices.begin(), + vertices.end(), + [vertex_partition = cugraph::vertex_partition_device_view_t{ + vertex_partition_view}] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); +} + +template +size_t count_duplicate_vertex_pairs_sorted(raft::handle_t const& handle, + raft::device_span src, + raft::device_span dst) +{ + return thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(src.size()), + [src, dst] __device__(size_t index) { + return (src[index - 1] == src[index]) && (dst[index - 1] == dst[index]); + }); +} + +// FIXME: Resolve this with dataframe_buffer variations in thrust_wrappers.cu +template +void sort(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts) +{ + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end())); +} + +template +size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2) +{ + // FIXME: Add support for wgts, edgeids and edge_types... + // Added to the API for future support. + + auto iter1 = thrust::make_zip_iterator(srcs1.begin(), dsts1.begin()); + auto iter2 = thrust::make_zip_iterator(srcs2.begin(), dsts2.begin()); + auto output_iter = thrust::make_discard_iterator(); + + return thrust::distance(output_iter, + thrust::set_intersection(handle.get_thrust_policy(), + iter1, + iter1 + srcs1.size(), + iter2, + iter2 + srcs2.size(), + output_iter)); +#if 0 + // OLD Approach + return thrust::count_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(src_out.begin(), dst_out.begin()), + thrust::make_zip_iterator(src_out.end(), dst_out.end()), + cuda::proclaim_return_type( + [src = raft::device_span{graph_src.data(), graph_src.size()}, + dst = raft::device_span{graph_dst.data(), + graph_dst.size()}] __device__(auto tuple) { +#if 0 + // FIXME: This fails on rocky linux CUDA 11.8, works on CUDA 12 + return thrust::binary_search(thrust::seq, + thrust::make_zip_iterator(src.begin(), dst.begin()), + thrust::make_zip_iterator(src.end(), dst.end()), + tuple) ? size_t{1} : size_t{0}; +#else + auto lb = thrust::distance( + src.begin(), + thrust::lower_bound(thrust::seq, src.begin(), src.end(), thrust::get<0>(tuple))); + auto ub = thrust::distance( + src.begin(), + thrust::upper_bound(thrust::seq, src.begin(), src.end(), thrust::get<0>(tuple))); + + if (src.data()[lb] == thrust::get<0>(tuple)) { + return thrust::binary_search( + thrust::seq, dst.begin() + lb, dst.begin() + ub, thrust::get<1>(tuple)) + ? size_t{1} + : size_t{0}; + } else { + return size_t{0}; + } +#endif + })); +#endif +} + +template +size_t count_edges_on_wrong_int_gpu(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts, + raft::device_span vertex_partition_range_lasts) +{ + return thrust::count_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(srcs.begin(), dsts.begin()), + thrust::make_zip_iterator(srcs.end(), dsts.end()), + [comm_rank = handle.get_comms().get_rank(), + gpu_id_key_func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + vertex_partition_range_lasts, + handle.get_comms().get_size(), + handle.get_subcomm(cugraph::partition_manager::major_comm_name()).get_size(), + handle.get_subcomm(cugraph::partition_manager::minor_comm_name()) + .get_size()}] __device__(auto e) { + return (gpu_id_key_func(thrust::get<0>(e), thrust::get<1>(e)) != comm_rank); + }); +} + +// TODO: Split SG from MG? +template size_t count_invalid_vertices( + raft::handle_t const& handle, + raft::device_span vertices, + cugraph::vertex_partition_view_t const& vertex_partition_view); + +template size_t count_invalid_vertices( + raft::handle_t const& handle, + raft::device_span vertices, + cugraph::vertex_partition_view_t const& vertex_partition_view); + +template size_t count_duplicate_vertex_pairs_sorted(raft::handle_t const& handle, + raft::device_span src, + raft::device_span dst); + +template size_t count_duplicate_vertex_pairs_sorted(raft::handle_t const& handle, + raft::device_span src, + raft::device_span dst); + +template void sort(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts); +template void sort(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template size_t count_edges_on_wrong_int_gpu( + raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts, + raft::device_span vertex_partition_range_lasts); + +template size_t count_edges_on_wrong_int_gpu( + raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts, + raft::device_span vertex_partition_range_lasts); + +} // namespace cugraph::test diff --git a/cpp/tests/utilities/validation_utilities.hpp b/cpp/tests/utilities/validation_utilities.hpp new file mode 100644 index 00000000000..b94ceaf68be --- /dev/null +++ b/cpp/tests/utilities/validation_utilities.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cugraph::test { +template +size_t count_invalid_vertices( + raft::handle_t const& handle, + raft::device_span vertices, + cugraph::vertex_partition_view_t const& vertex_partition); + +template +size_t count_duplicate_vertex_pairs_sorted(raft::handle_t const& handle, + raft::device_span src, + raft::device_span dst); + +template +void sort(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts); + +template +size_t count_intersection(raft::handle_t const& handle, + raft::device_span srcs1, + raft::device_span dsts1, + std::optional> wgts1, + std::optional> edge_ids1, + std::optional> edge_types1, + raft::device_span srcs2, + raft::device_span dsts2, + std::optional> wgts2, + std::optional> edge_ids2, + std::optional> edge_types2); + +template +size_t count_edges_on_wrong_int_gpu(raft::handle_t const& handle, + raft::device_span srcs, + raft::device_span dsts, + raft::device_span vertex_partition_range_lasts); + +} // namespace cugraph::test diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index eea789ef3e3..6778166ab6e 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -27,6 +27,12 @@ cd "$( cd "$( dirname "$(realpath -m "${BASH_SOURCE[0]}")" )" && pwd )"; # # FIXME: some test data needs to be extracted to "benchmarks", which is # confusing now that there's dedicated datasets for benchmarks. +CPP_CI_DATASET_DATA=" +# ~10s download +https://data.rapids.ai/cugraph/test/cpp_ci_datasets.tgz +test +" + BASE_DATASET_DATA=" # ~22s download https://data.rapids.ai/cugraph/test/datasets.tgz @@ -89,6 +95,8 @@ if hasArg "--benchmark"; then DATASET_DATA="${BENCHMARK_DATASET_DATA}" elif hasArg "--subset"; then DATASET_DATA="${BASE_DATASET_DATA}" +elif hasArg "--cpp_ci_subset"; then + DATASET_DATA="${CPP_CI_DATASET_DATA}" elif hasArg "--self_loops"; then DATASET_DATA="${SELF_LOOPS_DATASET_DATA}" # Do not include benchmark datasets by default - too big diff --git a/dependencies.yaml b/dependencies.yaml index 6bb728a2aae..640adf8099f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -20,6 +20,7 @@ files: - depends_on_dask_cudf - depends_on_pylibraft - depends_on_raft_dask + # Deprecate pylibcugraphops - depends_on_pylibcugraphops - depends_on_pylibwholegraph - depends_on_cupy @@ -44,6 +45,7 @@ files: - cuda_version - docs - py_version + # Deprecate pylibcugraphops - depends_on_pylibcugraphops test_cpp: output: none @@ -135,6 +137,7 @@ files: extras: table: project includes: + - cuda_wheels - depends_on_rmm - depends_on_pylibraft py_test_pylibcugraph: @@ -188,6 +191,7 @@ files: table: project includes: - python_run_cugraph_dgl + # Deprecate pylibcugraphops - depends_on_pylibcugraphops py_test_cugraph_dgl: output: pyproject @@ -214,6 +218,7 @@ files: table: project includes: - python_run_cugraph_pyg + # Deprecate pylibcugraphops - depends_on_pylibcugraphops py_test_cugraph_pyg: output: pyproject @@ -239,6 +244,7 @@ files: extras: table: project includes: + # Deprecate pylibcugraphops - depends_on_pylibcugraphops py_test_cugraph_equivariant: output: pyproject @@ -298,6 +304,7 @@ files: conda_dir: python/cugraph-dgl/conda includes: - checks + # Deprecate pylibcugraphops - depends_on_pylibcugraphops - cugraph_dgl_dev - test_python_common @@ -308,6 +315,7 @@ files: conda_dir: python/cugraph-pyg/conda includes: - checks + # Deprecate pylibcugraphops - depends_on_pylibcugraphops - cugraph_pyg_dev - test_python_common @@ -376,6 +384,36 @@ dependencies: packages: - cudatoolkit - cuda-nvtx + cuda_wheels: + specific: + - output_types: pyproject + matrices: + - matrix: + cuda: "12.*" + use_cuda_wheels: "true" + packages: + - nvidia-cublas-cu12 + - nvidia-curand-cu12 + - nvidia-cusolver-cu12 + - nvidia-cusparse-cu12 + # CUDA 11 does not provide wheels, so use the system libraries instead + - matrix: + cuda: "11.*" + use_cuda_wheels: "true" + packages: + # if use_cuda_wheels=false is provided, do not add dependencies on any CUDA wheels + # (e.g. for DLFW and pip devcontainers) + - matrix: + use_cuda_wheels: "false" + packages: + # if no matching matrix selectors passed, list the unsuffixed packages + # (just as a source of documentation, as this populates pyproject.toml in source control) + - matrix: + packages: + - nvidia-cublas + - nvidia-curand + - nvidia-cusolver + - nvidia-cusparse common_build: common: - output_types: [conda, pyproject] @@ -388,11 +426,12 @@ dependencies: packages: - c-compiler - cxx-compiler - - libcudf==24.10.*,>=0.0.0a0 - - libcugraphops==24.10.*,>=0.0.0a0 - - libraft-headers==24.10.*,>=0.0.0a0 - - libraft==24.10.*,>=0.0.0a0 - - librmm==24.10.*,>=0.0.0a0 + - libcudf==24.12.*,>=0.0.0a0 + # Deprecate libcugraphops + - libcugraphops==24.12.*,>=0.0.0a0 + - libraft-headers==24.12.*,>=0.0.0a0 + - libraft==24.12.*,>=0.0.0a0 + - librmm==24.12.*,>=0.0.0a0 - openmpi # Required for building cpp-mgtests (multi-GPU tests) specific: - output_types: [conda] @@ -441,10 +480,6 @@ dependencies: specific: - output_types: [conda] matrices: - - matrix: - py: "3.9" - packages: - - python=3.9 - matrix: py: "3.10" packages: @@ -453,9 +488,13 @@ dependencies: py: "3.11" packages: - python=3.11 + - matrix: + py: "3.12" + packages: + - python=3.12 - matrix: packages: - - python>=3.9,<3.12 + - python>=3.10,<3.13 python_build_rapids: common: - output_types: [conda, pyproject, requirements] @@ -474,26 +513,26 @@ dependencies: - cython>=3.0.0 - output_types: conda packages: - - scikit-build-core>=0.7.0 + - scikit-build-core>=0.10.0 - output_types: [pyproject, requirements] packages: - - scikit-build-core[pyproject]>=0.7.0 + - scikit-build-core[pyproject]>=0.10.0 python_run_cugraph: common: - output_types: [conda, pyproject] packages: - - &dask rapids-dask-dependency==24.10.*,>=0.0.0a0 - - &dask_cuda dask-cuda==24.10.*,>=0.0.0a0 + - &dask rapids-dask-dependency==24.12.*,>=0.0.0a0 + - &dask_cuda dask-cuda==24.12.*,>=0.0.0a0 - &numba numba>=0.57 - - &numpy numpy>=1.23,<2.0a0 + - &numpy numpy>=1.23,<3.0a0 - output_types: conda packages: - aiohttp - fsspec>=0.6.0 - requests - - nccl>=2.9.9 + - nccl>=2.19 - ucx-proc=*=gpu - - &ucx_py_unsuffixed ucx-py==0.40.*,>=0.0.0a0 + - &ucx_py_unsuffixed ucx-py==0.41.*,>=0.0.0a0 - output_types: pyproject packages: # cudf uses fsspec but is protocol independent. cugraph @@ -506,12 +545,12 @@ dependencies: cuda: "11.*" cuda_suffixed: "true" packages: - - &ucx_py_cu11 ucx-py-cu11==0.40.*,>=0.0.0a0 + - &ucx_py_cu11 ucx-py-cu11==0.41.*,>=0.0.0a0 - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - &ucx_py_cu12 ucx-py-cu12==0.40.*,>=0.0.0a0 + - &ucx_py_cu12 ucx-py-cu12==0.41.*,>=0.0.0a0 - matrix: packages: - *ucx_py_unsuffixed @@ -534,15 +573,15 @@ dependencies: cuda: "11.*" cuda_suffixed: "true" packages: - - &cugraph_cu11 cugraph-cu11==24.10.*,>=0.0.0a0 + - &cugraph_cu11 cugraph-cu11==24.12.*,>=0.0.0a0 - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - - &cugraph_cu12 cugraph-cu12==24.10.*,>=0.0.0a0 + - &cugraph_cu12 cugraph-cu12==24.12.*,>=0.0.0a0 - matrix: packages: - - &cugraph_unsuffixed cugraph==24.10.*,>=0.0.0a0 + - &cugraph_unsuffixed cugraph==24.12.*,>=0.0.0a0 python_run_cugraph_pyg: common: - output_types: [conda, pyproject] @@ -590,19 +629,19 @@ dependencies: cuda_suffixed: "true" packages: - *cugraph_cu11 - - cugraph-service-client-cu11==24.10.*,>=0.0.0a0 + - cugraph-service-client-cu11==24.12.*,>=0.0.0a0 - *ucx_py_cu11 - matrix: cuda: "12.*" cuda_suffixed: "true" packages: - *cugraph_cu12 - - cugraph-service-client-cu12==24.10.*,>=0.0.0a0 + - cugraph-service-client-cu12==24.12.*,>=0.0.0a0 - *ucx_py_cu12 - matrix: packages: - *cugraph_unsuffixed - - cugraph-service-client==24.10.*,>=0.0.0a0 + - cugraph-service-client==24.12.*,>=0.0.0a0 - *ucx_py_unsuffixed test_cpp: common: @@ -638,7 +677,7 @@ dependencies: - scikit-learn>=0.23.1 - output_types: [conda] packages: - - &pylibwholegraph_unsuffixed pylibwholegraph==24.10.*,>=0.0.0a0 + - &pylibwholegraph_unsuffixed pylibwholegraph==24.12.*,>=0.0.0a0 - *thrift test_python_pylibcugraph: common: @@ -649,7 +688,6 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - packaging>=21 # not needed by nx-cugraph tests, but is required for running networkx tests - pytest-mpl cugraph_dgl_dev: @@ -657,7 +695,9 @@ dependencies: - output_types: [conda] packages: - *cugraph_unsuffixed - - pytorch>=2.0 + # ceiling could be removed when this is fixed: + # https://github.com/conda-forge/pytorch-cpu-feedstock/issues/254 + - &pytorch_conda pytorch>=2.3,<2.4.0a0 - pytorch-cuda==11.8 - &tensordict tensordict>=0.1.2 - dgl>=1.1.0.cu* @@ -666,7 +706,7 @@ dependencies: - output_types: [conda] packages: - *cugraph_unsuffixed - - pytorch>=2.0 + - *pytorch_conda - pytorch-cuda==11.8 - *tensordict - pyg>=2.5,<2.6 @@ -675,9 +715,11 @@ dependencies: common: - output_types: [conda] packages: - - &pytorch_unsuffixed pytorch>=2.0,<2.2.0a0 + - *pytorch_conda - torchdata - pydantic + - ogb + - torchmetrics specific: - output_types: [requirements] @@ -693,7 +735,7 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - &pytorch_pip torch>=2.0,<2.2.0a0 + - &pytorch_pip torch>=2.3,<2.4.0a0 - *tensordict - matrix: {cuda: "11.*"} packages: @@ -718,19 +760,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibwholegraph-cu12==24.10.*,>=0.0.0a0 + - pylibwholegraph-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibwholegraph-cu11==24.10.*,>=0.0.0a0 + - pylibwholegraph-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibwholegraph_unsuffixed]} depends_on_rmm: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -743,19 +785,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==24.10.*,>=0.0.0a0 + - rmm-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - rmm-cu11==24.10.*,>=0.0.0a0 + - rmm-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*rmm_unsuffixed]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -768,19 +810,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.10.*,>=0.0.0a0 + - cudf-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.10.*,>=0.0.0a0 + - cudf-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_dask_cudf: common: - output_types: conda packages: - - &dask_cudf_unsuffixed dask-cudf==24.10.*,>=0.0.0a0 + - &dask_cudf_unsuffixed dask-cudf==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -793,19 +835,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - dask-cudf-cu12==24.10.*,>=0.0.0a0 + - dask-cudf-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - dask-cudf-cu11==24.10.*,>=0.0.0a0 + - dask-cudf-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*dask_cudf_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==24.10.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -818,19 +860,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==24.10.*,>=0.0.0a0 + - pylibraft-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibraft-cu11==24.10.*,>=0.0.0a0 + - pylibraft-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibraft_unsuffixed]} depends_on_raft_dask: common: - output_types: conda packages: - - &raft_dask_unsuffixed raft-dask==24.10.*,>=0.0.0a0 + - &raft_dask_unsuffixed raft-dask==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -843,19 +885,19 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - raft-dask-cu12==24.10.*,>=0.0.0a0 + - raft-dask-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - raft-dask-cu11==24.10.*,>=0.0.0a0 + - raft-dask-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*raft_dask_unsuffixed]} depends_on_pylibcugraph: common: - output_types: conda packages: - - &pylibcugraph_unsuffixed pylibcugraph==24.10.*,>=0.0.0a0 + - &pylibcugraph_unsuffixed pylibcugraph==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -868,19 +910,20 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibcugraph-cu12==24.10.*,>=0.0.0a0 + - pylibcugraph-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibcugraph-cu11==24.10.*,>=0.0.0a0 + - pylibcugraph-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcugraph_unsuffixed]} + # deprecate pylibcugraphops depends_on_pylibcugraphops: common: - output_types: conda packages: - - &pylibcugraphops_unsuffixed pylibcugraphops==24.10.*,>=0.0.0a0 + - &pylibcugraphops_unsuffixed pylibcugraphops==24.12.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -893,12 +936,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibcugraphops-cu12==24.10.*,>=0.0.0a0 + - pylibcugraphops-cu12==24.12.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - pylibcugraphops-cu11==24.10.*,>=0.0.0a0 + - pylibcugraphops-cu11==24.12.*,>=0.0.0a0 - {matrix: null, packages: [*pylibcugraphops_unsuffixed]} depends_on_cupy: diff --git a/docs/cugraph/source/_static/bc_benchmark.png b/docs/cugraph/source/_static/bc_benchmark.png new file mode 100644 index 00000000000..9e385c97e99 Binary files /dev/null and b/docs/cugraph/source/_static/bc_benchmark.png differ diff --git a/docs/cugraph/source/_static/colab.png b/docs/cugraph/source/_static/colab.png new file mode 100644 index 00000000000..c4c3f5b46e1 Binary files /dev/null and b/docs/cugraph/source/_static/colab.png differ diff --git a/docs/cugraph/source/_static/nxcg-execution-diagram.jpg b/docs/cugraph/source/_static/nxcg-execution-diagram.jpg new file mode 100644 index 00000000000..48136289af9 Binary files /dev/null and b/docs/cugraph/source/_static/nxcg-execution-diagram.jpg differ diff --git a/docs/cugraph/source/installation/getting_cugraph.md b/docs/cugraph/source/installation/getting_cugraph.md index 126325c09af..01bc9e379c9 100644 --- a/docs/cugraph/source/installation/getting_cugraph.md +++ b/docs/cugraph/source/installation/getting_cugraph.md @@ -21,7 +21,7 @@ The RAPIDS Docker containers contain all RAPIDS packages, including all from cuG ## Conda -It is easy to install cuGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download). +It is easy to install cuGraph using conda. You can get a minimal conda installation with [miniforge](https://github.com/conda-forge/miniforge). cuGraph Conda packages * cugraph - this will also import: @@ -45,7 +45,7 @@ conda install -c rapidsai -c conda-forge -c nvidia cugraph cuda-version=12.0 Alternatively, use `cuda-version=11.8` for packages supporting CUDA 11. -Note: This conda installation only applies to Linux and Python versions 3.9/3.10/3.11. +Note: This conda installation only applies to Linux and Python versions 3.10/3.11/3.12.
diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md index 89e63badef8..243a62e5c81 100644 --- a/docs/cugraph/source/installation/source_build.md +++ b/docs/cugraph/source/installation/source_build.md @@ -12,8 +12,7 @@ __Compilers:__ * `nvcc` version 11.5+ __CUDA:__ -* CUDA 11.2+ -* NVIDIA driver 470.42.01 or newer +* CUDA 11.8+ * NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+ Further details and download links for these prerequisites are available on the @@ -178,7 +177,7 @@ Run either the C++ or the Python tests with datasets make test ``` -Note: This conda installation only applies to Linux and Python versions 3.8/3.11. +Note: This conda installation only applies to Linux and Python versions 3.10, 3.11, and 3.12. ### (OPTIONAL) Set environment variable on activation diff --git a/docs/cugraph/source/nx_cugraph/benchmarks.md b/docs/cugraph/source/nx_cugraph/benchmarks.md new file mode 100644 index 00000000000..31d5e5b09eb --- /dev/null +++ b/docs/cugraph/source/nx_cugraph/benchmarks.md @@ -0,0 +1,28 @@ +# Benchmarks + +## NetworkX vs. nx-cugraph +We ran several commonly used graph algorithms on both `networkx` and `nx-cugraph`. Here are the results + + +
+ +![bench-image](../_static/bc_benchmark.png) + +
Results from running this Benchmark
+
+ +## Reproducing Benchmarks + +Below are the steps to reproduce the results on your workstation. These are documented in this [README](https://github.com/rapidsai/cugraph/blob/HEAD/benchmarks/nx-cugraph/pytest-based). + +1. Clone the latest + +2. Follow the instructions to build an environment + +3. Activate the environment + +4. Install the latest `nx-cugraph` by following the [guide](installation.md) + +5. Follow the instructions written in the README here: `cugraph/benchmarks/nx-cugraph/pytest-based/` diff --git a/docs/cugraph/source/nx_cugraph/faqs.md b/docs/cugraph/source/nx_cugraph/faqs.md new file mode 100644 index 00000000000..dee943d1908 --- /dev/null +++ b/docs/cugraph/source/nx_cugraph/faqs.md @@ -0,0 +1,5 @@ +# FAQ + + > **1. Is `nx-cugraph` able to run across multiple GPUs?** + +nx-cugraph currently does not support multi-GPU. Multi-GPU support may be added to a future release of nx-cugraph, but consider [cugraph](https://docs.rapids.ai/api/cugraph/stable) for multi-GPU accelerated graph analytics in Python today. diff --git a/docs/cugraph/source/nx_cugraph/how-it-works.md b/docs/cugraph/source/nx_cugraph/how-it-works.md new file mode 100644 index 00000000000..f9dc5af67ac --- /dev/null +++ b/docs/cugraph/source/nx_cugraph/how-it-works.md @@ -0,0 +1,114 @@ +# How it Works + +NetworkX has the ability to **dispatch function calls to separately-installed third-party backends**. + +NetworkX backends let users experience improved performance and/or additional functionality without changing their NetworkX Python code. Examples include backends that provide algorithm acceleration using GPUs, parallel processing, graph database integration, and more. + +While NetworkX is a pure-Python implementation with minimal to no dependencies, backends may be written in other languages and require specialized hardware and/or OS support, additional software dependencies, or even separate services. Installation instructions vary based on the backend, and additional information can be found from the individual backend project pages listed in the NetworkX Backend Gallery. + + +![nxcg-execution-flow](../_static/nxcg-execution-diagram.jpg) + +## Enabling nx-cugraph + +NetworkX will use nx-cugraph as the graph analytics backend if any of the +following are used: + +### `NETWORKX_BACKEND_PRIORITY` environment variable. + +The `NETWORKX_BACKEND_PRIORITY` environment variable can be used to have NetworkX automatically dispatch to specified backends. This variable can be set to a single backend name, or a comma-separated list of backends ordered using the priority which NetworkX should try. If a NetworkX function is called that nx-cugraph supports, NetworkX will redirect the function call to nx-cugraph automatically, or fall back to the next backend in the list if provided, or run using the default NetworkX implementation. See [NetworkX Backends and Configs](https://networkx.org/documentation/stable/reference/backends.html). + +For example, this setting will have NetworkX use nx-cugraph for any function called by the script supported by nx-cugraph, and the default NetworkX implementation for all others. +``` +bash> NETWORKX_BACKEND_PRIORITY=cugraph python my_networkx_script.py +``` + +This example will have NetworkX use nx-cugraph for functions it supports, then try other_backend if nx-cugraph does not support them, and finally the default NetworkX implementation if not supported by either backend: +``` +bash> NETWORKX_BACKEND_PRIORITY="cugraph,other_backend" python my_networkx_script.py +``` + +### `backend=` keyword argument + +To explicitly specify a particular backend for an API, use the `backend=` +keyword argument. This argument takes precedence over the +`NETWORKX_BACKEND_PRIORITY` environment variable. This requires anyone +running code that uses the `backend=` keyword argument to have the specified +backend installed. + +Example: +```python +nx.betweenness_centrality(cit_patents_graph, k=k, backend="cugraph") +``` + +### Type-based dispatching + +NetworkX also supports automatically dispatching to backends associated with +specific graph types. Like the `backend=` keyword argument example above, this +requires the user to write code for a specific backend, and therefore requires +the backend to be installed, but has the advantage of ensuring a particular +behavior without the potential for runtime conversions. + +To use type-based dispatching with nx-cugraph, the user must import the backend +directly in their code to access the utilities provided to create a Graph +instance specifically for the nx-cugraph backend. + +Example: +```python +import networkx as nx +import nx_cugraph as nxcg + +G = nx.Graph() +... +nxcg_G = nxcg.from_networkx(G) # conversion happens once here +nx.betweenness_centrality(nxcg_G, k=1000) # nxcg Graph type causes cugraph backend + # to be used, no conversion necessary +``` + +## Command Line Example + +--- + +Create `bc_demo.ipy` and paste the code below. + +```python +import pandas as pd +import networkx as nx + +url = "https://data.rapids.ai/cugraph/datasets/cit-Patents.csv" +df = pd.read_csv(url, sep=" ", names=["src", "dst"], dtype="int32") +G = nx.from_pandas_edgelist(df, source="src", target="dst") + +%time result = nx.betweenness_centrality(G, k=10) +``` +Run the command: +``` +user@machine:/# ipython bc_demo.ipy +``` + +You will observe a run time of approximately 7 minutes...more or less depending on your CPU. + +Run the command again, this time specifying cugraph as the NetworkX backend. +``` +user@machine:/# NETWORKX_BACKEND_PRIORITY=cugraph ipython bc_demo.ipy +``` +This run will be much faster, typically around 20 seconds depending on your GPU. +``` +user@machine:/# NETWORKX_BACKEND_PRIORITY=cugraph ipython bc_demo.ipy +``` +There is also an option to cache the graph conversion to GPU. This can dramatically improve performance when running multiple algorithms on the same graph. Caching is enabled by default for NetworkX versions 3.4 and later, but if using an older version, set "NETWORKX_CACHE_CONVERTED_GRAPHS=True" +``` +NETWORKX_BACKEND_PRIORITY=cugraph NETWORKX_CACHE_CONVERTED_GRAPHS=True ipython bc_demo.ipy +``` + +When running Python interactively, the cugraph backend can be specified as an argument in the algorithm call. + +For example: +``` +nx.betweenness_centrality(cit_patents_graph, k=k, backend="cugraph") +``` + + +The latest list of algorithms supported by nx-cugraph can be found [here](https://github.com/rapidsai/cugraph/blob/HEAD/python/nx-cugraph/README.md#algorithms) or in the next section. + +--- diff --git a/docs/cugraph/source/nx_cugraph/index.rst b/docs/cugraph/source/nx_cugraph/index.rst index ef6f51601ab..110300c1836 100644 --- a/docs/cugraph/source/nx_cugraph/index.rst +++ b/docs/cugraph/source/nx_cugraph/index.rst @@ -1,9 +1,48 @@ -=============================== -nxCugraph as a NetworkX Backend -=============================== +nx-cugraph +----------- +nx-cugraph is a `NetworkX backend `_ that provides **GPU acceleration** to many popular NetworkX algorithms. + +By simply `installing and enabling nx-cugraph `_, users can see significant speedup on workflows where performance is hindered by the default NetworkX implementation. With ``nx-cugraph``, users can have GPU-based, large-scale performance **without** changing their familiar and easy-to-use NetworkX code. + +.. code-block:: python + + import pandas as pd + import networkx as nx + + url = "https://data.rapids.ai/cugraph/datasets/cit-Patents.csv" + df = pd.read_csv(url, sep=" ", names=["src", "dst"], dtype="int32") + G = nx.from_pandas_edgelist(df, source="src", target="dst") + + %time result = nx.betweenness_centrality(G, k=10) + +.. figure:: ../_static/colab.png + :width: 200px + :target: https://nvda.ws/4drM4re + + Try it on Google Colab! + + ++------------------------------------------------------------------------------------------------------------------------+ +| **Zero Code Change Acceleration** | +| | +| Just ``nx.config.backend_priority=["cugraph"]`` in Jupyter, or set ``NETWORKX_BACKEND_PRIORITY=cugraph`` in the shell. | ++------------------------------------------------------------------------------------------------------------------------+ +| **Run the same code on CPU or GPU** | +| | +| Nothing changes, not even your `import` statements, when going from CPU to GPU. | ++------------------------------------------------------------------------------------------------------------------------+ + + +``nx-cugraph`` is now Generally Available (GA) as part of the ``RAPIDS`` package. See `RAPIDS +Quick Start `_ to get up-and-running with ``nx-cugraph``. .. toctree:: - :maxdepth: 2 + :maxdepth: 1 + :caption: Contents: - nx_cugraph.md + how-it-works + supported-algorithms + installation + benchmarks + faqs diff --git a/docs/cugraph/source/nx_cugraph/installation.md b/docs/cugraph/source/nx_cugraph/installation.md new file mode 100644 index 00000000000..8d221f16fec --- /dev/null +++ b/docs/cugraph/source/nx_cugraph/installation.md @@ -0,0 +1,50 @@ +# Getting Started + +This guide describes how to install ``nx-cugraph`` and use it in your workflows. + + +## System Requirements + +`nx-cugraph` requires the following: + + - **Volta architecture or later NVIDIA GPU, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+** + - **[CUDA](https://docs.nvidia.com/cuda/index.html) 11.2, 11.4, 11.5, 11.8, 12.0, 12.2, or 12.5** + - **Python >= 3.10** + - **[NetworkX](https://networkx.org/documentation/stable/install.html#) >= 3.0 (version 3.2 or higher recommended)** + +More details about system requirements can be found in the [RAPIDS System Requirements Documentation](https://docs.rapids.ai/install#system-req). + +## Installing nx-cugraph + +Read the [RAPIDS Quick Start Guide](https://docs.rapids.ai/install) to learn more about installing all RAPIDS libraries. + +`nx-cugraph` can be installed using conda or pip. It is included in the RAPIDS metapackage, or can be installed separately. + +### Conda +**Nightly version** +```bash +conda install -c rapidsai-nightly -c conda-forge -c nvidia nx-cugraph +``` + +**Stable version** +```bash +conda install -c rapidsai -c conda-forge -c nvidia nx-cugraph +``` + +### pip +**Nightly version** +```bash +pip install nx-cugraph-cu11 --extra-index-url https://pypi.anaconda.org/rapidsai-wheels-nightly/simple +``` + +**Stable version** +```bash +pip install nx-cugraph-cu11 --extra-index-url https://pypi.nvidia.com +``` + +
+ +**Note:** + - The `pip install` examples above are for CUDA 11. To install for CUDA 12, replace `-cu11` with `-cu12` + +
diff --git a/docs/cugraph/source/nx_cugraph/nx_cugraph.md b/docs/cugraph/source/nx_cugraph/nx_cugraph.md index 75a30b0be5c..900362a6e2b 100644 --- a/docs/cugraph/source/nx_cugraph/nx_cugraph.md +++ b/docs/cugraph/source/nx_cugraph/nx_cugraph.md @@ -1,18 +1,10 @@ ### nx_cugraph -nx-cugraph is a [NetworkX -backend]() that provides GPU acceleration to many popular NetworkX algorithms. - -By simply [installing and enabling nx-cugraph](), users can see significant speedup on workflows where performance is hindered by the default NetworkX implementation. With nx-cugraph, users can have GPU-based, large-scale performance without changing their familiar and easy-to-use NetworkX code. - -Let's look at some examples of algorithm speedups comparing NetworkX with and without GPU acceleration using nx-cugraph. - -Each chart has three measurements. -* NX - default NetworkX, no GPU acceleration -* nx-cugraph - GPU-accelerated NetworkX using nx-cugraph. This involves an internal conversion/transfer of graph data from CPU to GPU memory -* nx-cugraph (preconvert) - GPU-accelerated NetworkX using nx-cugraph with the graph data pre-converted/transferred to GPU +`nx-cugraph` is a [networkX backend]() that accelerates many popular NetworkX functions using cuGraph and NVIDIA GPUs. +Users simply [install and enable nx-cugraph](installation.md) to experience GPU speedups. +Lets look at some examples of algorithm speedups comparing CPU based NetworkX to dispatched versions run on GPU with nx_cugraph. ![Ancestors](../images/ancestors.png) ![BFS Tree](../images/bfs_tree.png) @@ -22,46 +14,3 @@ Each chart has three measurements. ![Pagerank](../images/pagerank.png) ![Single Source Shortest Path](../images/sssp.png) ![Weakly Connected Components](../images/wcc.png) - -### Command line example -Open bc_demo.ipy and paste the code below. - -``` -import pandas as pd -import networkx as nx - -url = "https://data.rapids.ai/cugraph/datasets/cit-Patents.csv" -df = pd.read_csv(url, sep=" ", names=["src", "dst"], dtype="int32") -G = nx.from_pandas_edgelist(df, source="src", target="dst") - -%time result = nx.betweenness_centrality(G, k=10) -``` -Run the command: -``` -user@machine:/# ipython bc_demo.ipy -``` - -You will observe a run time of approximately 7 minutes...more or less depending on your cpu. - -Run the command again, this time specifying cugraph as the NetworkX backend. -``` -user@machine:/# NETWORKX_BACKEND_PRIORITY=cugraph ipython bc_demo.ipy -``` -This run will be much faster, typically around 20 seconds depending on your GPU. -``` -user@machine:/# NETWORKX_BACKEND_PRIORITY=cugraph ipython bc_demo.ipy -``` -There is also an option to cache the graph conversion to GPU. This can dramatically improve performance when running multiple algorithms on the same graph. -``` -NETWORKX_BACKEND_PRIORITY=cugraph NETWORKX_CACHE_CONVERTED_GRAPHS=True ipython bc_demo.ipy -``` - -When running Python interactively, the cugraph backend can be specified as an argument in the algorithm call. - -For example: -``` -nx.betweenness_centrality(cit_patents_graph, k=k, backend="cugraph") -``` - - -The latest list of algorithms supported by nx-cugraph can be found [here](https://github.com/rapidsai/cugraph/blob/main/python/nx-cugraph/README.md#algorithms). diff --git a/docs/cugraph/source/nx_cugraph/supported-algorithms.rst b/docs/cugraph/source/nx_cugraph/supported-algorithms.rst new file mode 100644 index 00000000000..b21ef7bb668 --- /dev/null +++ b/docs/cugraph/source/nx_cugraph/supported-algorithms.rst @@ -0,0 +1,354 @@ +Supported Algorithms +===================== + +The nx-cugraph backend to NetworkX connects +`pylibcugraph <../../readme_pages/pylibcugraph.md>`_ (cuGraph's low-level Python +interface to its CUDA-based graph analytics library) and +`CuPy `_ (a GPU-accelerated array library) to NetworkX's +familiar and easy-to-use API. + +Below is the list of algorithms that are currently supported in nx-cugraph. + + +Algorithms +---------- + ++-----------------------------+ +| **Centrality** | ++=============================+ +| betweenness_centrality | ++-----------------------------+ +| edge_betweenness_centrality | ++-----------------------------+ +| degree_centrality | ++-----------------------------+ +| in_degree_centrality | ++-----------------------------+ +| out_degree_centrality | ++-----------------------------+ +| eigenvector_centrality | ++-----------------------------+ +| katz_centrality | ++-----------------------------+ + ++---------------------+ +| **Cluster** | ++=====================+ +| average_clustering | ++---------------------+ +| clustering | ++---------------------+ +| transitivity | ++---------------------+ +| triangles | ++---------------------+ + ++--------------------------+ +| **Community** | ++==========================+ +| louvain_communities | ++--------------------------+ + ++--------------------------+ +| **Bipartite** | ++==========================+ +| complete_bipartite_graph | ++--------------------------+ + ++------------------------------------+ +| **Components** | ++====================================+ +| connected_components | ++------------------------------------+ +| is_connected | ++------------------------------------+ +| node_connected_component | ++------------------------------------+ +| number_connected_components | ++------------------------------------+ +| weakly_connected | ++------------------------------------+ +| is_weakly_connected | ++------------------------------------+ +| number_weakly_connected_components | ++------------------------------------+ +| weakly_connected_components | ++------------------------------------+ + ++-------------+ +| **Core** | ++=============+ +| core_number | ++-------------+ +| k_truss | ++-------------+ + ++-------------+ +| **DAG** | ++=============+ +| ancestors | ++-------------+ +| descendants | ++-------------+ + ++--------------------+ +| **Isolate** | ++====================+ +| is_isolate | ++--------------------+ +| isolates | ++--------------------+ +| number_of_isolates | ++--------------------+ + ++-------------------+ +| **Link analysis** | ++===================+ +| hits | ++-------------------+ +| pagerank | ++-------------------+ + ++----------------+ +| **Operators** | ++================+ +| complement | ++----------------+ +| reverse | ++----------------+ + ++----------------------+ +| **Reciprocity** | ++======================+ +| overall_reciprocity | ++----------------------+ +| reciprocity | ++----------------------+ + ++---------------------------------------+ +| **Shortest Paths** | ++=======================================+ +| has_path | ++---------------------------------------+ +| shortest_path | ++---------------------------------------+ +| shortest_path_length | ++---------------------------------------+ +| all_pairs_shortest_path | ++---------------------------------------+ +| all_pairs_shortest_path_length | ++---------------------------------------+ +| bidirectional_shortest_path | ++---------------------------------------+ +| single_source_shortest_path | ++---------------------------------------+ +| single_source_shortest_path_length | ++---------------------------------------+ +| single_target_shortest_path | ++---------------------------------------+ +| single_target_shortest_path_length | ++---------------------------------------+ +| all_pairs_bellman_ford_path | ++---------------------------------------+ +| all_pairs_bellman_ford_path_length | ++---------------------------------------+ +| all_pairs_dijkstra | ++---------------------------------------+ +| all_pairs_dijkstra_path | ++---------------------------------------+ +| all_pairs_dijkstra_path_length | ++---------------------------------------+ +| bellman_ford_path | ++---------------------------------------+ +| bellman_ford_path_length | ++---------------------------------------+ +| dijkstra_path | ++---------------------------------------+ +| dijkstra_path_length | ++---------------------------------------+ +| single_source_bellman_ford | ++---------------------------------------+ +| single_source_bellman_ford_path | ++---------------------------------------+ +| single_source_bellman_ford_path_length| ++---------------------------------------+ +| single_source_dijkstra | ++---------------------------------------+ +| single_source_dijkstra_path | ++---------------------------------------+ +| single_source_dijkstra_path_length | ++---------------------------------------+ + ++---------------------------+ +| **Traversal** | ++===========================+ +| bfs_edges | ++---------------------------+ +| bfs_layers | ++---------------------------+ +| bfs_predecessors | ++---------------------------+ +| bfs_successors | ++---------------------------+ +| bfs_tree | ++---------------------------+ +| descendants_at_distance | ++---------------------------+ +| generic_bfs_edges | ++---------------------------+ + ++---------------------+ +| **Tree** | ++=====================+ +| is_arborescence | ++---------------------+ +| is_branching | ++---------------------+ +| is_forest | ++---------------------+ +| is_tree | ++---------------------+ + +Generators +------------ + ++-------------------------------+ +| **Classic** | ++===============================+ +| barbell_graph | ++-------------------------------+ +| circular_ladder_graph | ++-------------------------------+ +| complete_graph | ++-------------------------------+ +| complete_multipartite_graph | ++-------------------------------+ +| cycle_graph | ++-------------------------------+ +| empty_graph | ++-------------------------------+ +| ladder_graph | ++-------------------------------+ +| lollipop_graph | ++-------------------------------+ +| null_graph | ++-------------------------------+ +| path_graph | ++-------------------------------+ +| star_graph | ++-------------------------------+ +| tadpole_graph | ++-------------------------------+ +| trivial_graph | ++-------------------------------+ +| turan_graph | ++-------------------------------+ +| wheel_graph | ++-------------------------------+ + ++-----------------+ +| **Classic** | ++=================+ +| caveman_graph | ++-----------------+ + ++------------+ +| **Ego** | ++============+ +| ego_graph | ++------------+ + ++------------------------------+ +| **small** | ++==============================+ +| bull_graph | ++------------------------------+ +| chvatal_graph | ++------------------------------+ +| cubical_graph | ++------------------------------+ +| desargues_graph | ++------------------------------+ +| diamond_graph | ++------------------------------+ +| dodecahedral_graph | ++------------------------------+ +| frucht_graph | ++------------------------------+ +| heawood_graph | ++------------------------------+ +| house_graph | ++------------------------------+ +| house_x_graph | ++------------------------------+ +| icosahedral_graph | ++------------------------------+ +| krackhardt_kite_graph | ++------------------------------+ +| moebius_kantor_graph | ++------------------------------+ +| octahedral_graph | ++------------------------------+ +| pappus_graph | ++------------------------------+ +| petersen_graph | ++------------------------------+ +| sedgewick_maze_graph | ++------------------------------+ +| tetrahedral_graph | ++------------------------------+ +| truncated_cube_graph | ++------------------------------+ +| truncated_tetrahedron_graph | ++------------------------------+ +| tutte_graph | ++------------------------------+ + ++-------------------------------+ +| **Social** | ++===============================+ +| davis_southern_women_graph | ++-------------------------------+ +| florentine_families_graph | ++-------------------------------+ +| karate_club_graph | ++-------------------------------+ +| les_miserables_graph | ++-------------------------------+ + +Other +------- + ++-------------------------+ +| **Classes** | ++=========================+ +| is_negatively_weighted | ++-------------------------+ + ++----------------------+ +| **Convert** | ++======================+ +| from_dict_of_lists | ++----------------------+ +| to_dict_of_lists | ++----------------------+ + ++--------------------------+ +| **Convert Matrix** | ++==========================+ +| from_pandas_edgelist | ++--------------------------+ +| from_scipy_sparse_array | ++--------------------------+ + ++-----------------------------------+ +| **Relabel** | ++===================================+ +| convert_node_labels_to_integers | ++-----------------------------------+ +| relabel_nodes | ++-----------------------------------+ + + +To request nx-cugraph backend support for a NetworkX API that is not listed +above, visit the `cuGraph GitHub repo `_. diff --git a/docs/cugraph/source/tutorials/basic_cugraph.md b/docs/cugraph/source/tutorials/basic_cugraph.md index 78325472489..a0c9ad576b2 100644 --- a/docs/cugraph/source/tutorials/basic_cugraph.md +++ b/docs/cugraph/source/tutorials/basic_cugraph.md @@ -4,8 +4,8 @@ CuGraph is part of [Rapids](https://docs.rapids.ai/user-guide) and has the following system requirements: * NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+ - * CUDA 11.2, 11.4, 11.5, 11.8, 12.0 or 12.2 - * Python version 3.9, 3.10, or 3.11 + * CUDA 11.2, 11.4, 11.5, 11.8, 12.0, 12.2, or 12.5 + * Python version 3.10, 3.11, or 3.12 * NetworkX >= version 3.3 or newer in order to use use [NetworkX Configs](https://networkx.org/documentation/stable/reference/backends.html#module-networkx.utils.configs) **This is required for use of nx-cuGraph, [see below](#cugraph-using-networkx-code).** ## Installation diff --git a/docs/cugraph/source/tutorials/cugraph_notebooks.md b/docs/cugraph/source/tutorials/cugraph_notebooks.md index 559ba36e97e..6d7840dc3c4 100644 --- a/docs/cugraph/source/tutorials/cugraph_notebooks.md +++ b/docs/cugraph/source/tutorials/cugraph_notebooks.md @@ -55,10 +55,9 @@ Running the example in these notebooks requires: * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html)) * cuGraph is dependent on the latest version of cuDF. Please install all components of RAPIDS -* Python 3.8+ -* A system with an NVIDIA GPU: Pascal architecture or better +* Python 3.10+ +* A system with an NVIDIA GPU: Volta architecture or newer * CUDA 11.4+ -* NVIDIA driver 450.51+ ## Copyright diff --git a/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md index 57314dcd426..80c666d6593 100644 --- a/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md +++ b/docs/cugraph/source/wholegraph/installation/getting_wholegraph.md @@ -21,7 +21,7 @@ The RAPIDS Docker containers (as of Release 23.10) contain all RAPIDS packages, ## Conda -It is easy to install WholeGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download). +It is easy to install WholeGraph using conda. You can get a minimal conda installation with [miniforge](https://github.com/conda-forge/miniforge). WholeGraph conda packages * libwholegraph diff --git a/docs/cugraph/source/wholegraph/installation/source_build.md b/docs/cugraph/source/wholegraph/installation/source_build.md index a7727ac4052..7213cbfb096 100644 --- a/docs/cugraph/source/wholegraph/installation/source_build.md +++ b/docs/cugraph/source/wholegraph/installation/source_build.md @@ -16,8 +16,7 @@ __Compiler__: __CUDA__: * CUDA 11.8+ -* NVIDIA driver 450.80.02+ -* Pascal architecture or better +* Volta architecture or better You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). @@ -177,7 +176,7 @@ Run either the C++ or the Python tests with datasets ``` -Note: This conda installation only applies to Linux and Python versions 3.8/3.10. +Note: This conda installation only applies to Linux and Python versions 3.10, 3.11, and 3.12. ## Creating documentation diff --git a/notebooks/README.md b/notebooks/README.md index 818382f35a7..f0d0a25b9dd 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -56,10 +56,9 @@ Running the example in these notebooks requires: * Download via Docker, Conda (See [__Getting Started__](https://rapids.ai/start.html)) * cuGraph is dependent on the latest version of cuDF. Please install all components of RAPIDS -* Python 3.8+ -* A system with an NVIDIA GPU: Pascal architecture or better +* Python 3.10+ +* A system with an NVIDIA GPU: Volta architecture or newer * CUDA 11.4+ -* NVIDIA driver 450.51+ ### QuickStart @@ -67,13 +66,13 @@ The easiest way to run the notebooks is to get the latest [rapidsai/notebooks](h For example, get the latest (as of writing the document) nightly image (`a` after the version number indicates that an image is nightly) with cuda 12.0 using ```sh -docker pull rapidsai/notebooks:24.10a-cuda12.0-py3.9 +docker pull rapidsai/notebooks:24.12a-cuda12.0-py3.10 ``` And, then run a container based on the image using ```sh -docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.10a-cuda12.0-py3.9 +docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.12a-cuda12.0-py3.10 ``` You are all set. Run and edit cugraph notebooks from a browser at url http://127.0.0.1:8888/lab/tree/cugraph/cugraph_benchmarks @@ -89,8 +88,8 @@ ssh -L 127.0.0.1:8888:127.0.0.1:8888 [USER_NAME@][REMOTE_HOST_NAME or REMOTE_HO and then run the container in your remote machine. ```sh -docker pull rapidsai/notebooks:24.10a-cuda12.0-py3.9 -docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.10a-cuda12.0-py3.9 +docker pull rapidsai/notebooks:24.12a-cuda12.0-py3.10 +docker run --rm -it --pull always --gpus all --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -p 8888:8888 rapidsai/notebooks:24.12a-cuda12.0-py3.10 ``` You can run and edit cugraph notebooks at url http://127.0.0.1:8888/lab/tree/cugraph/cugraph_benchmarks as if they are running locally. diff --git a/notebooks/demo/accelerating_networkx.ipynb b/notebooks/demo/accelerating_networkx.ipynb new file mode 100644 index 00000000000..1a6c6cfb3f6 --- /dev/null +++ b/notebooks/demo/accelerating_networkx.ipynb @@ -0,0 +1,614 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "R2cpVp2WdOsp" + }, + "source": [ + "# NetworkX - Easy Graph Analytics\n", + "\n", + "NetworkX is the most popular library for graph analytics available in Python, or quite possibly any language. To illustrate this, NetworkX was downloaded more than 71 million times in September of 2024 alone, which is roughly 71 times more than the next most popular graph analytics library! [*](https://en.wikipedia.org/wiki/NetworkX) NetworkX has earned this popularity from its very easy-to-use API, the wealth of documentation and examples available, the large (and friendly) community behind it, and its easy installation which requires nothing more than Python.\n", + "\n", + "However, NetworkX users are familiar with the tradeoff that comes with those benefits. The pure-Python implementation often results in poor performance when graph data starts to reach larger scales, limiting the usefulness of the library for many real-world problems.\n", + "\n", + "# Accelerated NetworkX - Easy (and fast!) Graph Analytics\n", + "\n", + "To address the performance problem, NetworkX 3.0 introduced a mechanism to dispatch algorithm calls to alternate implementations. The NetworkX Python API remains the same but NetworkX will use more capable algorithm implementations provided by one or more backends. This approach means users don't have to give up NetworkX -or even change their code- in order to take advantage of GPU performance." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xkg10FrNThrK" + }, + "source": [ + "# Let's Get the Environment Setup\n", + "This notebook will demonstrate NetworkX both with and without GPU acceleration provided by the `nx-cugraph` backend.\n", + "\n", + "`nx-cugraph` is available as a package installable using `pip`, `conda`, and [from source](https://github.com/rapidsai/nx-cugraph). Before importing `networkx`, lets install `nx-cugraph` so it can be registered as an available backend by NetworkX when needed. We'll use `pip` to install.\n", + "\n", + "NOTES:\n", + "* `nx-cugraph` requires a compatible NVIDIA GPU, NVIDIA CUDA and associated drivers, and a supported OS. Details about these and other installation prerequisites can be seen [here](https://docs.rapids.ai/install#system-req).\n", + "* The `nx-cugraph` package is currently hosted by NVIDIA and therefore the `--extra-index-url` option must be used.\n", + "* `nx-cugraph` is supported on specific 11.x and 12.x CUDA versions, and the major version number must be known in order to install the correct build (this is determined automatically when using `conda`).\n", + "\n", + "To find the CUDA major version on your system, run the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NMFwzc1I95BS" + }, + "outputs": [], + "source": [ + "!nvcc --version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i91Yj-yZ-nGS" + }, + "source": [ + "From the above output we can see we're using CUDA 12.x so we'll be installing `nx-cugraph-cu12`. If we were using CUDA 11.x, the package name would be `nx-cugraph-cu11`. We'll also be adding `https://pypi.nvidia.com` as an `--extra-index-url`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mYYN9EpnWphu" + }, + "outputs": [], + "source": [ + "!pip install nx-cugraph-cu12 --extra-index-url=https://pypi.nvidia.com" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0h1K-7tI_AZH" + }, + "source": [ + "Of course, we'll also be using `networkx`, which is already provided in the Colab environment. This notebook will be using features added in version 3.3, so we'll import it here to verify we have a compatible version." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YTV0ZTME2tV6" + }, + "outputs": [], + "source": [ + "import networkx as nx\n", + "nx.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UiZKOa3WC7be" + }, + "source": [ + "# Let's Start with Something Simple\n", + "\n", + "To begin, we'll compare NetworkX results without a backend to results of the same algorithm using the `nx-cugraph` backend on a small graph. `nx.karate_club_graph()` returns an instance of the famous example graph consisting of 34 nodes and 78 edges from Zachary's paper, described [here](https://en.wikipedia.org/wiki/Zachary%27s_karate_club)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3atL3tI0frYm" + }, + "source": [ + "## Betweenness Centrality\n", + "[Betweenness Centrality](https://en.wikipedia.org/wiki/Betweenness_centrality) is a graph algorithm that computes a centrality score for each node (`v`) based on how many of the shortest paths between pairs of nodes in the graph pass through `v`. A higher centrality score represents a node that \"connects\" other nodes in a network more than that of a node with a lower score.\n", + "\n", + "First, let's create a NetworkX Graph instance of the the Karate Club graph and inspect it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JSw7EZ46-kRu" + }, + "outputs": [], + "source": [ + "G = nx.karate_club_graph()\n", + "G.number_of_nodes(), G.number_of_edges()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_-E17u2gKgbC" + }, + "source": [ + "Next, let's run betweenness centrality and save the results. Because the Karate Club graph is so small, this should not take long." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qjxXXKJhKQ4s" + }, + "outputs": [], + "source": [ + "%%time\n", + "nx_bc_results = nx.betweenness_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ClrR3z9XMfLr" + }, + "source": [ + "Now, let's run the same algorithm on the same data using the `nx-cugraph` backend.\n", + "\n", + "There are several ways to instruct NetworkX to use a particular backend instead of the default implementation. Here, we will use the `config` API, which was added in NetworkX version 3.3.\n", + "\n", + "The following two lines set the backend to \"cugraph\" and enable graph conversion caching.\n", + "\n", + "Some notes:\n", + "* The standard convention for NetworkX backends is to name the package with a `nx-` prefix to denote that these are packages intended to be used with NetworkX, but the `nx-` prefix is not included when referring to them in NetworkX API calls. Here, `nx-cugraph` is the name of the backend package, and `\"cugraph\"` is the name NetworkX will use to refer to it.\n", + "* NetworkX can use multiple backends! `nx.config.backend_priority` is a list that can contain several backends, ordered based on priority. If a backend in the list cannot run a particular algorithm (either because it isn't supported in the backend, the algorithm doesn't support a particular option, or some other reason), NetworkX will try the next backend in the list. If no specified backend is able to run the algorithm, NetworkX will fall back to the default implementation.\n", + "* Many backends have their own data structures for representing an input graph, often optimized for that backend's implementation. Prior to running a backend algorithm, NetworkX will have the backend convert the standard NetworkX Graph instance to the backend-specific type. This conversion can be expensive, and rather than repeat it as part of each algorithm call, NetworkX can cache the conversion so it can be skipped on future calls if the graph doesn't change. This caching can save significant time and improve overall performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oFHwNqqsNsqS" + }, + "outputs": [], + "source": [ + "nx.config.backend_priority=[\"cugraph\"] # NETWORKX_BACKEND_PRIORITY=cugraph\n", + "nx.config.cache_converted_graphs=True # NETWORKX_CACHE_CONVERTED_GRAPHS=True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HrUeWRRQRzFP" + }, + "outputs": [], + "source": [ + "%%time\n", + "nxcg_bc_results = nx.betweenness_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z1hxut3GTj5A" + }, + "source": [ + "You may have noticed that using the `nx-cugraph` backend resulted in a slightly slower execution time. This is not surprising when working with a graph this small, since the overhead of converting the graph for the first time and launching the algorithm kernel on the GPU is actually significantly more than the computation time itself. We'll see later that this overhead is negligible when compared to the time saved when running on a GPU for larger graphs.\n", + "\n", + "Since we've enabled graph conversion caching, we can see that if we re-run the same call the execution time is noticeably shorter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7a0XvpUOr9Ju" + }, + "outputs": [], + "source": [ + "%%time\n", + "nxcg_bc_results = nx.betweenness_centrality(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ppjE5J5RscOe" + }, + "source": [ + "Notice the warning above about using the cache. This will only be raised **once** per graph instance (it can also be easily disabled), but its purpose is to point out that the cache should not be used if the Graph object will have its attribute dictionary modified directly. In this case and many others, we won't be modifying the dictionaries directly. Instead, we will use APIs such as `nx.set_node_attributes` which properly clear the cache, so it's safe for us to use the cache. Because of that, we'll disable the warning so we don't see it on other graphs in this session." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Namb5JLvwS-q" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\", message=\"Using cached graph for 'cugraph' backend\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BzGAphcILFsT" + }, + "source": [ + "Smaller graphs are also easy to visualize with NetworkX's plotting utilities. The flexibility of NetworkX's `Graph` instances make it trivial to add the betweenness centrality scores back to the graph object as node attributes. This will allow us to use those values for the visualization.\n", + "\n", + "In this case, we'll create new attributes for each node called \"nx_bc\" for the default NetworkX results, and \"nxcg_bc\" for the nx-cugraph results. We'll use those values to assign the color for each node and plot two graphs side-by-side. This will make it easy to visually validate that the nodes with the higher centrality scores for both implementations match and do indeed appear to be more \"central\" to other nodes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1coV6ZfcUoqI" + }, + "outputs": [], + "source": [ + "nx.set_node_attributes(G, nx_bc_results, \"nx_bc\")\n", + "nx.set_node_attributes(G, nxcg_bc_results, \"nxcg_bc\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Sba2iYJgLoN2" + }, + "outputs": [], + "source": [ + "# Configure plot size and layout/position for each node\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams['figure.figsize'] = [12, 8]\n", + "pos = nx.spring_layout(G)\n", + "\n", + "# Assign colors for each set of betweenness centrality results\n", + "nx_colors = [G.nodes[n][\"nx_bc\"] for n in G.nodes()]\n", + "nxcg_colors = [G.nodes[n][\"nxcg_bc\"] for n in G.nodes()]\n", + "\n", + "# Plot the graph and color each node corresponding to NetworkX betweenness centrality values\n", + "plt.subplot(1, 2, 1)\n", + "nx.draw(G, pos=pos, with_labels=True, node_color=nx_colors)\n", + "\n", + "# Plot the graph and color each node corresponding to nx-cugraph betweenness centrality values\n", + "plt.subplot(1, 2, 2)\n", + "nx.draw(G, pos=pos, with_labels=True, node_color=nxcg_colors)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dJXH4Zn5VNSg" + }, + "source": [ + "As we can see, the same two nodes (`0` and `33`) are the two most central in both graphs, followed by `2`, `31`, and `32`.\n", + "\n", + "## PageRank\n", + "Another popular algorithm is [PageRank](https://en.wikipedia.org/wiki/PageRank). PageRank also assigns scores to each node, but these scores are based on analyzing links to each node to determine relative \"importance\" within the graph.\n", + "\n", + "Let's update the config to use the default NetworkX implementation and run `nx.pagerank`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9CdYNk62E1v_" + }, + "outputs": [], + "source": [ + "nx.config.backend_priority=[]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Jo39YxVmYolq" + }, + "outputs": [], + "source": [ + "%%time\n", + "nx_pr_results = nx.pagerank(G)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sV6dM8ToZDiC" + }, + "source": [ + "We could set `nx.config.backend_priority` again to list `\"cugraph\"` as the backend, but let's instead show how the `backend` kwarg can be used to override the priority list and force a specific backend to be used." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oMSvQVGKY0rn" + }, + "outputs": [], + "source": [ + "%%time\n", + "nxcg_pr_results = nx.pagerank(G, backend=\"cugraph\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZGux_8xFZneI" + }, + "source": [ + "In this example, instead of plotting the graph to show that the results are identical, we can compare them directly using the saved values from both runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RcmtdFy4Zw7p" + }, + "outputs": [], + "source": [ + "sorted(nx_pr_results) == sorted(nxcg_pr_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mefjUEAnZ4pq" + }, + "source": [ + "# Working with Bigger Data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yLY-yl6PuNYo" + }, + "source": [ + "Now we'll look at a larger dataset from https://snap.stanford.edu/data/cit-Patents.html which contains citations across different U.S. patents granted from January 1, 1963 to December 30, 1999. The dataset represents 16.5M citations (edges) between 3.77M patents (nodes).\n", + "\n", + "This will demonstrate that data of this size starts to push the limits of the default pure-Python NetworkX implementation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lyYF0LbtFwjh" + }, + "outputs": [], + "source": [ + "# The locale encoding may have been modified from the plots above, reset here to run shell commands\n", + "import locale\n", + "locale.getpreferredencoding = lambda: \"UTF-8\"\n", + "!wget https://data.rapids.ai/cugraph/datasets/cit-Patents.csv # Skip if cit-Patents.csv already exists.\n", + "# !wget https://snap.stanford.edu/data/cit-Patents.txt.gz # Skip if cit-Patents.txt.gz already exists." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kjGINYphQSQ2" + }, + "outputs": [], + "source": [ + "%load_ext cudf.pandas\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iV4DieGZOalc" + }, + "outputs": [], + "source": [ + "%%time\n", + "df = pd.read_csv(\"cit-Patents.csv\",\n", + " sep=\" \",\n", + " names=[\"src\", \"dst\"],\n", + " dtype=\"int32\",\n", + ")\n", + "# df = pd.read_csv(\"cit-Patents.txt.gz\",\n", + "# compression=\"gzip\",\n", + "# skiprows=4,\n", + "# sep=\"\\t\",\n", + "# names=[\"src\", \"dst\"],\n", + "# dtype=\"int32\",\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PREA67u4eKat" + }, + "outputs": [], + "source": [ + "%%time\n", + "G = nx.from_pandas_edgelist(df, source=\"src\", target=\"dst\")\n", + "G.number_of_nodes(), G.number_of_edges()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NcsUxBqpu4zY" + }, + "source": [ + "By default, `nx.betweenness_centrality` will perform an all-pairs shortest path analysis when determining the centrality scores for each node. However, due to the much larger size of this graph, determining the shortest path for all pairs of nodes in the graph is not feasible. Instead, we'll use the parameter `k` to limit the number of shortest path computations used for determining the centrality scores, at the expense of accuracy. As we'll see when using a dataset this size with `nx.betweenness_centrality`, we have to limit `k` to `1` which is not practical but is sufficient here for demonstration purposes (since anything larger than `1` will result in many minutes of execution time)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gNDWbj3kAk3j" + }, + "outputs": [], + "source": [ + "%%time\n", + "bc_results = nx.betweenness_centrality(G, k=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NB8xmxMd1PlX" + }, + "source": [ + "Now we'll configure NetworkX to use the `nx-cugraph` backend (again, using the name convention that drops the package name's `nx-` prefix) and run the same call. Because this is a Graph that `nx-cugraph` hasn't seen before, the runtime will include the time to convert and cache a GPU-based graph." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xUYNG1xhvbWc" + }, + "outputs": [], + "source": [ + "nx.config.backend_priority = [\"cugraph\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cmK8ZuQGvfPo" + }, + "outputs": [], + "source": [ + "%%time\n", + "bc_results = nx.betweenness_centrality(G, k=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vdHb1YXP15TZ" + }, + "source": [ + "Let's run betweenness centrality again, now with a more useful number of samples by setting `k=100`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fKjIrzL-vrGS" + }, + "outputs": [], + "source": [ + "%%time\n", + "bc_results = nx.betweenness_centrality(G, k=100)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QeMcrAX2HZSM" + }, + "source": [ + "Let's also run pagerank on the same dataset to compare." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gR8ID6ekHgHt" + }, + "outputs": [], + "source": [ + "nx.config.backend_priority = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rTFuvX5wb_c1" + }, + "outputs": [], + "source": [ + "%%time\n", + "nx_pr_results = nx.pagerank(G)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8sJx9aeJV9hv" + }, + "outputs": [], + "source": [ + "%%time\n", + "nxcg_pr_results = nx.pagerank(G, backend=\"cugraph\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wGOVQ6ZyY4Ih" + }, + "outputs": [], + "source": [ + "sorted(nx_pr_results) == sorted(nxcg_pr_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k2DfAaZaDIBj" + }, + "source": [ + "---\n", + "\n", + "Information on the U.S. Patent Citation Network dataset used in this notebook is as follows:\n", + "
Authors: Jure Leskovec and Andrej Krevl\n", + "
Title: SNAP Datasets, Stanford Large Network Dataset Collection\n", + "
URL: http://snap.stanford.edu/data\n", + "
Date: June 2014\n", + "
\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/demo/mg_pagerank.ipynb b/notebooks/demo/mg_pagerank.ipynb index bb333048450..e3314f80b3c 100644 --- a/notebooks/demo/mg_pagerank.ipynb +++ b/notebooks/demo/mg_pagerank.ipynb @@ -219,250 +219,250 @@ "text": [ "2023-05-12 09:25:01,974 - distributed.sizeof - WARNING - Sizeof calculation failed. Defaulting to 0.95 MiB\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", " return sizeof(obj)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", " return meth(arg, *args, **kwargs)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", " + df._index.memory_usage()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", " if self.levels:\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", " self._compute_levels_and_codes()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", " code, cats = cudf.Series._from_data({None: col}).factorize()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", " return cudf.core.algorithms.factorize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", " labels = values._column._label_encoding(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", " order = order.take(left_gather_map, check_bounds=False).argsort()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", " return self.as_frame()._get_sorted_inds(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", " return libcudf.sort.order_by(to_sort, ascending, na_position)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", " File \"sort.pyx\", line 141, in cudf._lib.sort.order_by\n", - "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniconda3/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", + "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniforge/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", "2023-05-12 09:25:01,976 - distributed.sizeof - WARNING - Sizeof calculation failed. Defaulting to 0.95 MiB\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", " return sizeof(obj)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", " return meth(arg, *args, **kwargs)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", " + df._index.memory_usage()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", " if self.levels:\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", " self._compute_levels_and_codes()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", " code, cats = cudf.Series._from_data({None: col}).factorize()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", " return cudf.core.algorithms.factorize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", " labels = values._column._label_encoding(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", " order = order.take(left_gather_map, check_bounds=False).argsort()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", " return self.as_frame()._get_sorted_inds(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", " return libcudf.sort.order_by(to_sort, ascending, na_position)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", " File \"sort.pyx\", line 141, in cudf._lib.sort.order_by\n", - "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniconda3/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", + "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniforge/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", "2023-05-12 09:25:03,767 - distributed.sizeof - WARNING - Sizeof calculation failed. Defaulting to 0.95 MiB\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", " return sizeof(obj)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", " return meth(arg, *args, **kwargs)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", " + df._index.memory_usage()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", " if self.levels:\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", " self._compute_levels_and_codes()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", " code, cats = cudf.Series._from_data({None: col}).factorize()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", " return cudf.core.algorithms.factorize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", " labels = values._column._label_encoding(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", " order = order.take(left_gather_map, check_bounds=False).argsort()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", " return self.as_frame()._get_sorted_inds(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", " return libcudf.sort.order_by(to_sort, ascending, na_position)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", " File \"sort.pyx\", line 141, in cudf._lib.sort.order_by\n", - "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniconda3/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", + "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniforge/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", "2023-05-12 09:25:03,768 - distributed.sizeof - WARNING - Sizeof calculation failed. Defaulting to 0.95 MiB\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/sizeof.py\", line 17, in safe_sizeof\n", " return sizeof(obj)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/utils.py\", line 642, in __call__\n", " return meth(arg, *args, **kwargs)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask_cudf/backends.py\", line 430, in sizeof_cudf_dataframe\n", " + df._index.memory_usage()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 1594, in memory_usage\n", " if self.levels:\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 605, in levels\n", " self._compute_levels_and_codes()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/multiindex.py\", line 748, in _compute_levels_and_codes\n", " code, cats = cudf.Series._from_data({None: col}).factorize()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/single_column_frame.py\", line 311, in factorize\n", " return cudf.core.algorithms.factorize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/algorithms.py\", line 138, in factorize\n", " labels = values._column._label_encoding(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1385, in _label_encoding\n", " order = order.take(left_gather_map, check_bounds=False).argsort()\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1101, in argsort\n", " return self.as_frame()._get_sorted_inds(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 1572, in _get_sorted_inds\n", " return libcudf.sort.order_by(to_sort, ascending, na_position)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", " File \"sort.pyx\", line 141, in cudf._lib.sort.order_by\n", - "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniconda3/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", + "MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /home/dacosta/miniforge/envs/cugraph_0411/include/rmm/mr/device/cuda_memory_resource.hpp\n", "2023-05-12 09:25:03,820 - distributed.worker - ERROR - Could not deserialize task ('len-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d', 1)\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2923, in loads_function\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2923, in loads_function\n", " result = cache_loads[bytes_object]\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/collections.py\", line 24, in __getitem__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/collections.py\", line 24, in __getitem__\n", " value = super().__getitem__(key)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/collections/__init__.py\", line 1106, in __getitem__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/collections/__init__.py\", line 1106, in __getitem__\n", " raise KeyError(key)\n", "KeyError: b'\\x80\\x05\\x95>\\x0b\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x11dask.optimization\\x94\\x8c\\x10SubgraphCallable\\x94\\x93\\x94(}\\x94(\\x8cKlen-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d\\x94\\x8cZassign-getitem-len-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d\\x94\\x8c*rename-01db283bd79fee66f232920c8dc6b55e_.0\\x94\\x8c;getitem-to_frame-rename-01db283bd79fee66f232920c8dc6b55e_.0\\x94\\x8c+getitem-3499fd71ac25ebbc1a06991edea6067c_.0\\x94\\x8c\\t_operator\\x94\\x8c\\x07getitem\\x94\\x93\\x94\\x8c/reset_index-f4c18304ca92859ccd09f44cf89b4b43_.0\\x94\\x8c\\x13__dask_blockwise__1\\x94\\x87\\x94h\\x0c(\\x8c\\ndask.utils\\x94\\x8c\\x05apply\\x94\\x93\\x94h\\x0f\\x8c\\x0cmethodcaller\\x94\\x93\\x94\\x8c\\x0breset_index\\x94\\x85\\x94R\\x94]\\x94\\x8c\\x13__dask_blockwise__5\\x94a\\x8c\\x08builtins\\x94\\x8c\\x04dict\\x94\\x93\\x94]\\x94]\\x94(\\x8c\\x04drop\\x94\\x89ea\\x86\\x94t\\x94h\\x07(h\\x11\\x8c\\x13dask.dataframe.core\\x94\\x8c\\x11apply_and_enforce\\x94\\x93\\x94]\\x94((h\\x11h#]\\x94h\\x0bh\\x0c\\x8c\\x13__dask_blockwise__0\\x94\\x87\\x94ah\\x1b]\\x94(]\\x94(\\x8c\\x05_func\\x94h\\x13\\x8c\\x08to_frame\\x94\\x85\\x94R\\x94e]\\x94(\\x8c\\x05_meta\\x94\\x8c\\x08builtins\\x94\\x8c\\x07getattr\\x94\\x93\\x94\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94\\x8c\\x10host_deserialize\\x94\\x86\\x94R\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94C0\\x80\\x04\\x95%\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94.\\x94\\x8c\\x0ccolumn_names\\x94C\\x14\\x80\\x04\\x95\\t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x03src\\x94\\x85\\x94.\\x94\\x8c\\x07columns\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94C=\\x80\\x04\\x952\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x1acudf.core.column.numerical\\x94\\x8c\\x0fNumericalColumn\\x94\\x93\\x94.\\x94\\x8c\\x05dtype\\x94CB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i4\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94\\x8c\\x18dtype-is-cudf-serialized\\x94\\x89\\x8c\\x04data\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94CI\\x80\\x04\\x95>\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x14SpillableBufferSlice\\x94\\x93\\x94.\\x94\\x8c\\x0bframe_count\\x94K\\x01u\\x8c\\x04mask\\x94}\\x94(hGCD\\x80\\x04\\x959\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x0fSpillableBuffer\\x94\\x93\\x94.\\x94hIK\\x01u\\x8c\\x04size\\x94K\\x00hIK\\x02u\\x85\\x94\\x8c\\x05index\\x94}\\x94(\\x8c\\x0cindex_column\\x94}\\x94(\\x8c\\x05start\\x94K\\x00\\x8c\\x04stop\\x94K\\x00\\x8c\\x04step\\x94K\\x01u\\x8c\\x04name\\x94C\\x04\\x80\\x04N.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94\\x8c\\x0ftype-serialized\\x94C-\\x80\\x04\\x95\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x0fcudf.core.index\\x94\\x8c\\nRangeIndex\\x94\\x93\\x94.\\x94hIK\\x00u\\x8c\\x11index_frame_count\\x94K\\x00\\x8c\\x07is-cuda\\x94]\\x94(\\x88\\x88e\\x8c\\x07lengths\\x94]\\x94(K\\x00K\\x00e\\x8c\\twriteable\\x94NN\\x86\\x94u]\\x94(\\x8c\\x12numpy.core.numeric\\x94\\x8c\\x0b_frombuffer\\x94\\x93\\x94(C\\x00\\x94\\x8c\\x05numpy\\x94hB\\x93\\x94\\x8c\\x02u1\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01|\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94bK\\x00\\x85\\x94\\x8c\\x01C\\x94t\\x94R\\x94he(C\\x00\\x94hkK\\x00\\x85\\x94hot\\x94R\\x94e\\x86\\x94R\\x94ee\\x86\\x94t\\x94\\x8c\\x13__dask_blockwise__2\\x94eh\\x1b]\\x94(]\\x94(h*h\\x13\\x8c\\x06rename\\x94\\x85\\x94R\\x94e]\\x94(h/h2h5h6\\x86\\x94R\\x94}\\x94(h:C0\\x80\\x04\\x95%\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94.\\x94h}\\x94(h@C=\\x80\\x04\\x952\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x1acudf.core.column.numerical\\x94\\x8c\\x0fNumericalColumn\\x94\\x93\\x94.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i4\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94hD\\x89hE}\\x94(hGCI\\x80\\x04\\x95>\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x14SpillableBufferSlice\\x94\\x93\\x94.\\x94hIK\\x01uhMK\\x00hIK\\x01u\\x85\\x94hO}\\x94(hQ}\\x94(hSK\\x00hTK\\x00hUK\\x01uhVC\\x04\\x80\\x04N.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94hYC-\\x80\\x04\\x95\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x0fcudf.core.index\\x94\\x8c\\nRangeIndex\\x94\\x93\\x94.\\x94hIK\\x00uh[K\\x00h\\\\]\\x94\\x88ah^]\\x94K\\x00ah`N\\x85\\x94u]\\x94he(C\\x00\\x94hkK\\x00\\x85\\x94hot\\x94R\\x94a\\x86\\x94R\\x94e]\\x94(h>h\\x1b]\\x94]\\x94(\\x8c\\x03src\\x94h\\x9eea\\x86\\x94ee\\x86\\x94t\\x94h\\x05(h\\x11h!\\x8c\\x10_reduction_chunk\\x94\\x93\\x94]\\x94h\\x0b(\\x8c\\x16dask.dataframe.methods\\x94\\x8c\\x06assign\\x94\\x93\\x94h\\x06h\\rh\\x08t\\x94h&\\x87\\x94ah\\x1b]\\x94]\\x94(\\x8c\\taca_chunk\\x94h0\\x8c\\x03len\\x94\\x93\\x94ea\\x86\\x94t\\x94\\x8c\\x13__dask_blockwise__0\\x94h\\x9e\\x8c\\x13__dask_blockwise__1\\x94\\x8c\\x03dst\\x94\\x8c\\x13__dask_blockwise__2\\x94N\\x8c\\x13__dask_blockwise__3\\x94\\x8c)to_frame-804980ae30b71d28f0a6bd3d5b7610f9\\x94\\x8c\\x13__dask_blockwise__4\\x94\\x8c(getitem-15414b72be12e28054238b44933937ab\\x94\\x8c\\x13__dask_blockwise__6\\x94\\x8c3cudf-aggregate-agg-c50c2d97de169ca4f41e43a92a042630\\x94uh\\x04\\x8c\\x13__dask_blockwise__5\\x94\\x85\\x94\\x8c6subgraph_callable-b4ca530e-8895-432e-b553-40a7b5892ab2\\x94t\\x94R\\x94.'\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2244, in execute\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2244, in execute\n", " function, args, kwargs = await self._maybe_deserialize_task(ts)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2216, in _maybe_deserialize_task\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2216, in _maybe_deserialize_task\n", " function, args, kwargs = _deserialize(*ts.run_spec)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2937, in _deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2937, in _deserialize\n", " function = loads_function(function)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2925, in loads_function\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2925, in loads_function\n", " result = pickle.loads(bytes_object)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py\", line 96, in loads\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py\", line 96, in loads\n", " return pickle.loads(x)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 176, in host_deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 176, in host_deserialize\n", " obj = cls.device_deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 130, in device_deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 130, in device_deserialize\n", " return typ.deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py\", line 1019, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py\", line 1019, in deserialize\n", " obj = super().deserialize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 106, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 106, in deserialize\n", " columns = deserialize_columns(header[\"columns\"], frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 2450, in deserialize_columns\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 2450, in deserialize_columns\n", " colobj = col_typ.deserialize(meta, frames[:col_frame_count])\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1216, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1216, in deserialize\n", " data, frames = unpack(header[\"data\"], frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1204, in unpack\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1204, in unpack\n", " obj = klass.deserialize(header, frames[:count])\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 574, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 574, in deserialize\n", " return SpillableBuffer.deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py\", line 335, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py\", line 335, in deserialize\n", " return cls._from_device_memory(frame)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 235, in _from_device_memory\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 235, in _from_device_memory\n", " ret._finalize_init(ptr_desc={\"type\": \"gpu\"}, exposed=exposed)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 206, in _finalize_init\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 206, in _finalize_init\n", " raise ValueError(\n", "ValueError: cannot create without a global spill manager\n", "2023-05-12 09:25:03,817 - distributed.worker - ERROR - Could not deserialize task ('len-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d', 0)\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2923, in loads_function\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2923, in loads_function\n", " result = cache_loads[bytes_object]\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/collections.py\", line 24, in __getitem__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/collections.py\", line 24, in __getitem__\n", " value = super().__getitem__(key)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/collections/__init__.py\", line 1106, in __getitem__\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/collections/__init__.py\", line 1106, in __getitem__\n", " raise KeyError(key)\n", "KeyError: b'\\x80\\x05\\x95>\\x0b\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x11dask.optimization\\x94\\x8c\\x10SubgraphCallable\\x94\\x93\\x94(}\\x94(\\x8cKlen-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d\\x94\\x8cZassign-getitem-len-chunk-319fe46af5510615b2fae86c6e732896-841a12bf4568ebb80eb2030cc4d9651d\\x94\\x8c*rename-01db283bd79fee66f232920c8dc6b55e_.0\\x94\\x8c;getitem-to_frame-rename-01db283bd79fee66f232920c8dc6b55e_.0\\x94\\x8c+getitem-3499fd71ac25ebbc1a06991edea6067c_.0\\x94\\x8c\\t_operator\\x94\\x8c\\x07getitem\\x94\\x93\\x94\\x8c/reset_index-f4c18304ca92859ccd09f44cf89b4b43_.0\\x94\\x8c\\x13__dask_blockwise__1\\x94\\x87\\x94h\\x0c(\\x8c\\ndask.utils\\x94\\x8c\\x05apply\\x94\\x93\\x94h\\x0f\\x8c\\x0cmethodcaller\\x94\\x93\\x94\\x8c\\x0breset_index\\x94\\x85\\x94R\\x94]\\x94\\x8c\\x13__dask_blockwise__5\\x94a\\x8c\\x08builtins\\x94\\x8c\\x04dict\\x94\\x93\\x94]\\x94]\\x94(\\x8c\\x04drop\\x94\\x89ea\\x86\\x94t\\x94h\\x07(h\\x11\\x8c\\x13dask.dataframe.core\\x94\\x8c\\x11apply_and_enforce\\x94\\x93\\x94]\\x94((h\\x11h#]\\x94h\\x0bh\\x0c\\x8c\\x13__dask_blockwise__0\\x94\\x87\\x94ah\\x1b]\\x94(]\\x94(\\x8c\\x05_func\\x94h\\x13\\x8c\\x08to_frame\\x94\\x85\\x94R\\x94e]\\x94(\\x8c\\x05_meta\\x94\\x8c\\x08builtins\\x94\\x8c\\x07getattr\\x94\\x93\\x94\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94\\x8c\\x10host_deserialize\\x94\\x86\\x94R\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94C0\\x80\\x04\\x95%\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94.\\x94\\x8c\\x0ccolumn_names\\x94C\\x14\\x80\\x04\\x95\\t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x03src\\x94\\x85\\x94.\\x94\\x8c\\x07columns\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94C=\\x80\\x04\\x952\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x1acudf.core.column.numerical\\x94\\x8c\\x0fNumericalColumn\\x94\\x93\\x94.\\x94\\x8c\\x05dtype\\x94CB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i4\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94\\x8c\\x18dtype-is-cudf-serialized\\x94\\x89\\x8c\\x04data\\x94}\\x94(\\x8c\\x0ftype-serialized\\x94CI\\x80\\x04\\x95>\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x14SpillableBufferSlice\\x94\\x93\\x94.\\x94\\x8c\\x0bframe_count\\x94K\\x01u\\x8c\\x04mask\\x94}\\x94(hGCD\\x80\\x04\\x959\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x0fSpillableBuffer\\x94\\x93\\x94.\\x94hIK\\x01u\\x8c\\x04size\\x94K\\x00hIK\\x02u\\x85\\x94\\x8c\\x05index\\x94}\\x94(\\x8c\\x0cindex_column\\x94}\\x94(\\x8c\\x05start\\x94K\\x00\\x8c\\x04stop\\x94K\\x00\\x8c\\x04step\\x94K\\x01u\\x8c\\x04name\\x94C\\x04\\x80\\x04N.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94\\x8c\\x0ftype-serialized\\x94C-\\x80\\x04\\x95\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x0fcudf.core.index\\x94\\x8c\\nRangeIndex\\x94\\x93\\x94.\\x94hIK\\x00u\\x8c\\x11index_frame_count\\x94K\\x00\\x8c\\x07is-cuda\\x94]\\x94(\\x88\\x88e\\x8c\\x07lengths\\x94]\\x94(K\\x00K\\x00e\\x8c\\twriteable\\x94NN\\x86\\x94u]\\x94(\\x8c\\x12numpy.core.numeric\\x94\\x8c\\x0b_frombuffer\\x94\\x93\\x94(C\\x00\\x94\\x8c\\x05numpy\\x94hB\\x93\\x94\\x8c\\x02u1\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01|\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94bK\\x00\\x85\\x94\\x8c\\x01C\\x94t\\x94R\\x94he(C\\x00\\x94hkK\\x00\\x85\\x94hot\\x94R\\x94e\\x86\\x94R\\x94ee\\x86\\x94t\\x94\\x8c\\x13__dask_blockwise__2\\x94eh\\x1b]\\x94(]\\x94(h*h\\x13\\x8c\\x06rename\\x94\\x85\\x94R\\x94e]\\x94(h/h2h5h6\\x86\\x94R\\x94}\\x94(h:C0\\x80\\x04\\x95%\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x13cudf.core.dataframe\\x94\\x8c\\tDataFrame\\x94\\x93\\x94.\\x94h}\\x94(h@C=\\x80\\x04\\x952\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x1acudf.core.column.numerical\\x94\\x8c\\x0fNumericalColumn\\x94\\x93\\x94.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i4\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94hD\\x89hE}\\x94(hGCI\\x80\\x04\\x95>\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c!cudf.core.buffer.spillable_buffer\\x94\\x8c\\x14SpillableBufferSlice\\x94\\x93\\x94.\\x94hIK\\x01uhMK\\x00hIK\\x01u\\x85\\x94hO}\\x94(hQ}\\x94(hSK\\x00hTK\\x00hUK\\x01uhVC\\x04\\x80\\x04N.\\x94hBCB\\x80\\x04\\x957\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x05numpy\\x94\\x8c\\x05dtype\\x94\\x93\\x94\\x8c\\x02i8\\x94\\x89\\x88\\x87\\x94R\\x94(K\\x03\\x8c\\x01<\\x94NNNJ\\xff\\xff\\xff\\xffJ\\xff\\xff\\xff\\xffK\\x00t\\x94b.\\x94hYC-\\x80\\x04\\x95\"\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8c\\x0fcudf.core.index\\x94\\x8c\\nRangeIndex\\x94\\x93\\x94.\\x94hIK\\x00uh[K\\x00h\\\\]\\x94\\x88ah^]\\x94K\\x00ah`N\\x85\\x94u]\\x94he(C\\x00\\x94hkK\\x00\\x85\\x94hot\\x94R\\x94a\\x86\\x94R\\x94e]\\x94(h>h\\x1b]\\x94]\\x94(\\x8c\\x03src\\x94h\\x9eea\\x86\\x94ee\\x86\\x94t\\x94h\\x05(h\\x11h!\\x8c\\x10_reduction_chunk\\x94\\x93\\x94]\\x94h\\x0b(\\x8c\\x16dask.dataframe.methods\\x94\\x8c\\x06assign\\x94\\x93\\x94h\\x06h\\rh\\x08t\\x94h&\\x87\\x94ah\\x1b]\\x94]\\x94(\\x8c\\taca_chunk\\x94h0\\x8c\\x03len\\x94\\x93\\x94ea\\x86\\x94t\\x94\\x8c\\x13__dask_blockwise__0\\x94h\\x9e\\x8c\\x13__dask_blockwise__1\\x94\\x8c\\x03dst\\x94\\x8c\\x13__dask_blockwise__2\\x94N\\x8c\\x13__dask_blockwise__3\\x94\\x8c)to_frame-804980ae30b71d28f0a6bd3d5b7610f9\\x94\\x8c\\x13__dask_blockwise__4\\x94\\x8c(getitem-15414b72be12e28054238b44933937ab\\x94\\x8c\\x13__dask_blockwise__6\\x94\\x8c3cudf-aggregate-agg-c50c2d97de169ca4f41e43a92a042630\\x94uh\\x04\\x8c\\x13__dask_blockwise__5\\x94\\x85\\x94\\x8c6subgraph_callable-b4ca530e-8895-432e-b553-40a7b5892ab2\\x94t\\x94R\\x94.'\n", "\n", "During handling of the above exception, another exception occurred:\n", "\n", "Traceback (most recent call last):\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2244, in execute\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2244, in execute\n", " function, args, kwargs = await self._maybe_deserialize_task(ts)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2216, in _maybe_deserialize_task\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2216, in _maybe_deserialize_task\n", " function, args, kwargs = _deserialize(*ts.run_spec)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py\", line 79, in inner\n", " return func(*args, **kwds)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2937, in _deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2937, in _deserialize\n", " function = loads_function(function)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2925, in loads_function\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py\", line 2925, in loads_function\n", " result = pickle.loads(bytes_object)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py\", line 96, in loads\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py\", line 96, in loads\n", " return pickle.loads(x)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 176, in host_deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 176, in host_deserialize\n", " obj = cls.device_deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 130, in device_deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py\", line 130, in device_deserialize\n", " return typ.deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py\", line 1019, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py\", line 1019, in deserialize\n", " obj = super().deserialize(\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 106, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py\", line 106, in deserialize\n", " columns = deserialize_columns(header[\"columns\"], frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 2450, in deserialize_columns\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 2450, in deserialize_columns\n", " colobj = col_typ.deserialize(meta, frames[:col_frame_count])\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1216, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1216, in deserialize\n", " data, frames = unpack(header[\"data\"], frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1204, in unpack\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py\", line 1204, in unpack\n", " obj = klass.deserialize(header, frames[:count])\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 574, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 574, in deserialize\n", " return SpillableBuffer.deserialize(header, frames)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py\", line 335, in deserialize\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py\", line 335, in deserialize\n", " return cls._from_device_memory(frame)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 235, in _from_device_memory\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 235, in _from_device_memory\n", " ret._finalize_init(ptr_desc={\"type\": \"gpu\"}, exposed=exposed)\n", - " File \"/home/dacosta/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 206, in _finalize_init\n", + " File \"/home/dacosta/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py\", line 206, in _finalize_init\n", " raise ValueError(\n", "ValueError: cannot create without a global spill manager\n" ] @@ -475,34 +475,34 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39m# Create a directed graph using the source (src) and destination (dst) vertex pairs from the Dataframe \u001b[39;00m\n\u001b[1;32m 2\u001b[0m G \u001b[39m=\u001b[39m cugraph\u001b[39m.\u001b[39mGraph(directed\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[0;32m----> 3\u001b[0m G\u001b[39m.\u001b[39;49mfrom_dask_cudf_edgelist(e_list, source\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39msrc\u001b[39;49m\u001b[39m'\u001b[39;49m, destination\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mdst\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 5\u001b[0m \u001b[39m# Print time\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mRead, load and renumber: \u001b[39m\u001b[39m\"\u001b[39m, time\u001b[39m.\u001b[39mtime()\u001b[39m-\u001b[39mt_start, \u001b[39m\"\u001b[39m\u001b[39ms\u001b[39m\u001b[39m\"\u001b[39m)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cugraph/structure/graph_classes.py:309\u001b[0m, in \u001b[0;36mGraph.from_dask_cudf_edgelist\u001b[0;34m(self, input_ddf, source, destination, edge_attr, renumber, store_transposed, legacy_renum_only)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_Impl\u001b[39m.\u001b[39medgelist \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 308\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mGraph already has values\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 309\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_Impl\u001b[39m.\u001b[39;49m_simpleDistributedGraphImpl__from_edgelist(\n\u001b[1;32m 310\u001b[0m input_ddf,\n\u001b[1;32m 311\u001b[0m source,\n\u001b[1;32m 312\u001b[0m destination,\n\u001b[1;32m 313\u001b[0m edge_attr,\n\u001b[1;32m 314\u001b[0m renumber,\n\u001b[1;32m 315\u001b[0m store_transposed,\n\u001b[1;32m 316\u001b[0m legacy_renum_only,\n\u001b[1;32m 317\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cugraph/structure/graph_implementation/simpleDistributedGraph.py:272\u001b[0m, in \u001b[0;36msimpleDistributedGraphImpl.__from_edgelist\u001b[0;34m(self, input_ddf, source, destination, edge_attr, renumber, store_transposed, legacy_renum_only)\u001b[0m\n\u001b[1;32m 268\u001b[0m dst_col_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrenumber_map\u001b[39m.\u001b[39mrenumbered_dst_col_name\n\u001b[1;32m 270\u001b[0m ddf \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39medgelist\u001b[39m.\u001b[39medgelist_df\n\u001b[0;32m--> 272\u001b[0m num_edges \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39;49m(ddf)\n\u001b[1;32m 273\u001b[0m edge_data \u001b[39m=\u001b[39m get_distributed_data(ddf)\n\u001b[1;32m 275\u001b[0m graph_props \u001b[39m=\u001b[39m GraphProperties(\n\u001b[1;32m 276\u001b[0m is_multigraph\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproperties\u001b[39m.\u001b[39mmulti_edge,\n\u001b[1;32m 277\u001b[0m is_symmetric\u001b[39m=\u001b[39m\u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproperties\u001b[39m.\u001b[39mdirected,\n\u001b[1;32m 278\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/dataframe/core.py:4775\u001b[0m, in \u001b[0;36mDataFrame.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 4773\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__len__\u001b[39m()\n\u001b[1;32m 4774\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 4775\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mlen\u001b[39;49m(s)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/dataframe/core.py:843\u001b[0m, in \u001b[0;36m_Frame.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 840\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 841\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreduction(\n\u001b[1;32m 842\u001b[0m \u001b[39mlen\u001b[39;49m, np\u001b[39m.\u001b[39;49msum, token\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mlen\u001b[39;49m\u001b[39m\"\u001b[39;49m, meta\u001b[39m=\u001b[39;49m\u001b[39mint\u001b[39;49m, split_every\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[0;32m--> 843\u001b[0m )\u001b[39m.\u001b[39;49mcompute()\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/base.py:314\u001b[0m, in \u001b[0;36mDaskMethodsMixin.compute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcompute\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 291\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[1;32m 292\u001b[0m \n\u001b[1;32m 293\u001b[0m \u001b[39m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[39m dask.base.compute\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m (result,) \u001b[39m=\u001b[39m compute(\u001b[39mself\u001b[39;49m, traverse\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 315\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/dask/base.py:599\u001b[0m, in \u001b[0;36mcompute\u001b[0;34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[0m\n\u001b[1;32m 596\u001b[0m keys\u001b[39m.\u001b[39mappend(x\u001b[39m.\u001b[39m__dask_keys__())\n\u001b[1;32m 597\u001b[0m postcomputes\u001b[39m.\u001b[39mappend(x\u001b[39m.\u001b[39m__dask_postcompute__())\n\u001b[0;32m--> 599\u001b[0m results \u001b[39m=\u001b[39m schedule(dsk, keys, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 600\u001b[0m \u001b[39mreturn\u001b[39;00m repack([f(r, \u001b[39m*\u001b[39ma) \u001b[39mfor\u001b[39;00m r, (f, a) \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(results, postcomputes)])\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:3186\u001b[0m, in \u001b[0;36mClient.get\u001b[0;34m(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)\u001b[0m\n\u001b[1;32m 3184\u001b[0m should_rejoin \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 3185\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3186\u001b[0m results \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgather(packed, asynchronous\u001b[39m=\u001b[39;49masynchronous, direct\u001b[39m=\u001b[39;49mdirect)\n\u001b[1;32m 3187\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 3188\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m futures\u001b[39m.\u001b[39mvalues():\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:2345\u001b[0m, in \u001b[0;36mClient.gather\u001b[0;34m(self, futures, errors, direct, asynchronous)\u001b[0m\n\u001b[1;32m 2343\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 2344\u001b[0m local_worker \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 2345\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msync(\n\u001b[1;32m 2346\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_gather,\n\u001b[1;32m 2347\u001b[0m futures,\n\u001b[1;32m 2348\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m 2349\u001b[0m direct\u001b[39m=\u001b[39;49mdirect,\n\u001b[1;32m 2350\u001b[0m local_worker\u001b[39m=\u001b[39;49mlocal_worker,\n\u001b[1;32m 2351\u001b[0m asynchronous\u001b[39m=\u001b[39;49masynchronous,\n\u001b[1;32m 2352\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:349\u001b[0m, in \u001b[0;36mSyncMethodMixin.sync\u001b[0;34m(self, func, asynchronous, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[39mreturn\u001b[39;00m future\n\u001b[1;32m 348\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 349\u001b[0m \u001b[39mreturn\u001b[39;00m sync(\n\u001b[1;32m 350\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mloop, func, \u001b[39m*\u001b[39;49margs, callback_timeout\u001b[39m=\u001b[39;49mcallback_timeout, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 351\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:416\u001b[0m, in \u001b[0;36msync\u001b[0;34m(loop, func, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[39mif\u001b[39;00m error:\n\u001b[1;32m 415\u001b[0m typ, exc, tb \u001b[39m=\u001b[39m error\n\u001b[0;32m--> 416\u001b[0m \u001b[39mraise\u001b[39;00m exc\u001b[39m.\u001b[39mwith_traceback(tb)\n\u001b[1;32m 417\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 418\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:389\u001b[0m, in \u001b[0;36msync..f\u001b[0;34m()\u001b[0m\n\u001b[1;32m 387\u001b[0m future \u001b[39m=\u001b[39m wait_for(future, callback_timeout)\n\u001b[1;32m 388\u001b[0m future \u001b[39m=\u001b[39m asyncio\u001b[39m.\u001b[39mensure_future(future)\n\u001b[0;32m--> 389\u001b[0m result \u001b[39m=\u001b[39m \u001b[39myield\u001b[39;00m future\n\u001b[1;32m 390\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 391\u001b[0m error \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/tornado/gen.py:769\u001b[0m, in \u001b[0;36mRunner.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 766\u001b[0m exc_info \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 768\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 769\u001b[0m value \u001b[39m=\u001b[39m future\u001b[39m.\u001b[39;49mresult()\n\u001b[1;32m 770\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 771\u001b[0m exc_info \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:2208\u001b[0m, in \u001b[0;36mClient._gather\u001b[0;34m(self, futures, errors, direct, local_worker)\u001b[0m\n\u001b[1;32m 2206\u001b[0m exc \u001b[39m=\u001b[39m CancelledError(key)\n\u001b[1;32m 2207\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 2208\u001b[0m \u001b[39mraise\u001b[39;00m exception\u001b[39m.\u001b[39mwith_traceback(traceback)\n\u001b[1;32m 2209\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[1;32m 2210\u001b[0m \u001b[39mif\u001b[39;00m errors \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mskip\u001b[39m\u001b[39m\"\u001b[39m:\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/contextlib.py:79\u001b[0m, in \u001b[0;36minner\u001b[0;34m()\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[39m@wraps\u001b[39m(func)\n\u001b[1;32m 77\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minner\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds):\n\u001b[1;32m 78\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_recreate_cm():\n\u001b[0;32m---> 79\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py:2937\u001b[0m, in \u001b[0;36m_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2934\u001b[0m \u001b[39m# Some objects require threadlocal state during deserialization, e.g. to\u001b[39;00m\n\u001b[1;32m 2935\u001b[0m \u001b[39m# detect the current worker\u001b[39;00m\n\u001b[1;32m 2936\u001b[0m \u001b[39mif\u001b[39;00m function \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 2937\u001b[0m function \u001b[39m=\u001b[39m loads_function(function)\n\u001b[1;32m 2938\u001b[0m \u001b[39mif\u001b[39;00m args \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(args, \u001b[39mbytes\u001b[39m):\n\u001b[1;32m 2939\u001b[0m args \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(args)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py:2925\u001b[0m, in \u001b[0;36mloads_function\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2923\u001b[0m result \u001b[39m=\u001b[39m cache_loads[bytes_object]\n\u001b[1;32m 2924\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[0;32m-> 2925\u001b[0m result \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(bytes_object)\n\u001b[1;32m 2926\u001b[0m cache_loads[bytes_object] \u001b[39m=\u001b[39m result\n\u001b[1;32m 2927\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py:96\u001b[0m, in \u001b[0;36mloads\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[39mreturn\u001b[39;00m pickle\u001b[39m.\u001b[39mloads(x, buffers\u001b[39m=\u001b[39mbuffers)\n\u001b[1;32m 95\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 96\u001b[0m \u001b[39mreturn\u001b[39;00m pickle\u001b[39m.\u001b[39mloads(x)\n\u001b[1;32m 97\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 98\u001b[0m logger\u001b[39m.\u001b[39minfo(\u001b[39m\"\u001b[39m\u001b[39mFailed to deserialize \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m, x[:\u001b[39m10000\u001b[39m], exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py:176\u001b[0m, in \u001b[0;36mhost_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Perform device-side deserialization tasks.\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \n\u001b[1;32m 156\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[39m:meta private:\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 172\u001b[0m frames \u001b[39m=\u001b[39m [\n\u001b[1;32m 173\u001b[0m cudf\u001b[39m.\u001b[39mcore\u001b[39m.\u001b[39mbuffer\u001b[39m.\u001b[39mas_buffer(f) \u001b[39mif\u001b[39;00m c \u001b[39melse\u001b[39;00m f\n\u001b[1;32m 174\u001b[0m \u001b[39mfor\u001b[39;00m c, f \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(header[\u001b[39m\"\u001b[39m\u001b[39mis-cuda\u001b[39m\u001b[39m\"\u001b[39m], \u001b[39mmap\u001b[39m(\u001b[39mmemoryview\u001b[39m, frames))\n\u001b[1;32m 175\u001b[0m ]\n\u001b[0;32m--> 176\u001b[0m obj \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mdevice_deserialize(header, frames)\n\u001b[1;32m 177\u001b[0m \u001b[39mreturn\u001b[39;00m obj\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py:130\u001b[0m, in \u001b[0;36mdevice_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 125\u001b[0m typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 126\u001b[0m frames \u001b[39m=\u001b[39m [\n\u001b[1;32m 127\u001b[0m cudf\u001b[39m.\u001b[39mcore\u001b[39m.\u001b[39mbuffer\u001b[39m.\u001b[39mas_buffer(f) \u001b[39mif\u001b[39;00m c \u001b[39melse\u001b[39;00m \u001b[39mmemoryview\u001b[39m(f)\n\u001b[1;32m 128\u001b[0m \u001b[39mfor\u001b[39;00m c, f \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(header[\u001b[39m\"\u001b[39m\u001b[39mis-cuda\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 129\u001b[0m ]\n\u001b[0;32m--> 130\u001b[0m \u001b[39mreturn\u001b[39;00m typ\u001b[39m.\u001b[39mdeserialize(header, frames)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py:1019\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1016\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 1017\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize\u001b[39m(\u001b[39mcls\u001b[39m, header, frames):\n\u001b[1;32m 1018\u001b[0m index_nframes \u001b[39m=\u001b[39m header[\u001b[39m\"\u001b[39m\u001b[39mindex_frame_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m-> 1019\u001b[0m obj \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mdeserialize(\n\u001b[1;32m 1020\u001b[0m header, frames[header[\u001b[39m\"\u001b[39m\u001b[39mindex_frame_count\u001b[39m\u001b[39m\"\u001b[39m] :]\n\u001b[1;32m 1021\u001b[0m )\n\u001b[1;32m 1023\u001b[0m idx_typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 1024\u001b[0m index \u001b[39m=\u001b[39m idx_typ\u001b[39m.\u001b[39mdeserialize(header[\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m], frames[:index_nframes])\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py:106\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 104\u001b[0m cls_deserialize \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 105\u001b[0m column_names \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mcolumn_names\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m--> 106\u001b[0m columns \u001b[39m=\u001b[39m deserialize_columns(header[\u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 107\u001b[0m \u001b[39mreturn\u001b[39;00m cls_deserialize\u001b[39m.\u001b[39m_from_data(\u001b[39mdict\u001b[39m(\u001b[39mzip\u001b[39m(column_names, columns)))\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:2450\u001b[0m, in \u001b[0;36mdeserialize_columns\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2448\u001b[0m col_frame_count \u001b[39m=\u001b[39m meta[\u001b[39m\"\u001b[39m\u001b[39mframe_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 2449\u001b[0m col_typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(meta[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m-> 2450\u001b[0m colobj \u001b[39m=\u001b[39m col_typ\u001b[39m.\u001b[39mdeserialize(meta, frames[:col_frame_count])\n\u001b[1;32m 2451\u001b[0m columns\u001b[39m.\u001b[39mappend(colobj)\n\u001b[1;32m 2452\u001b[0m \u001b[39m# Advance frames\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:1216\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1214\u001b[0m dtype \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 1215\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m header:\n\u001b[0;32m-> 1216\u001b[0m data, frames \u001b[39m=\u001b[39m unpack(header[\u001b[39m\"\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 1217\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1218\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:1204\u001b[0m, in \u001b[0;36munpack\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1202\u001b[0m count \u001b[39m=\u001b[39m header[\u001b[39m\"\u001b[39m\u001b[39mframe_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 1203\u001b[0m klass \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m-> 1204\u001b[0m obj \u001b[39m=\u001b[39m klass\u001b[39m.\u001b[39mdeserialize(header, frames[:count])\n\u001b[1;32m 1205\u001b[0m \u001b[39mreturn\u001b[39;00m obj, frames[count:]\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:574\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize\u001b[39m(\u001b[39mcls\u001b[39m, header: \u001b[39mdict\u001b[39m, frames: \u001b[39mlist\u001b[39m):\n\u001b[1;32m 569\u001b[0m \u001b[39m# TODO: because of the hack in `SpillableBuffer.serialize()` where\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[39m# deserialize into `SpillableBufferSlice` when the frames hasn't been\u001b[39;00m\n\u001b[1;32m 573\u001b[0m \u001b[39m# copied.\u001b[39;00m\n\u001b[0;32m--> 574\u001b[0m \u001b[39mreturn\u001b[39;00m SpillableBuffer\u001b[39m.\u001b[39mdeserialize(header, frames)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py:335\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[39mreturn\u001b[39;00m frame \u001b[39m# The frame is already deserialized\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(frame, \u001b[39m\"\u001b[39m\u001b[39m__cuda_array_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m--> 335\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_from_device_memory(frame)\n\u001b[1;32m 336\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_from_host_memory(frame)\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:235\u001b[0m, in \u001b[0;36m_from_device_memory\u001b[0;34m()\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Create a spillabe buffer from device memory.\u001b[39;00m\n\u001b[1;32m 219\u001b[0m \n\u001b[1;32m 220\u001b[0m \u001b[39mNo data is being copied.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[39m Buffer representing the same device memory as `data`\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 234\u001b[0m ret \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m_from_device_memory(data)\n\u001b[0;32m--> 235\u001b[0m ret\u001b[39m.\u001b[39m_finalize_init(ptr_desc\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mtype\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mgpu\u001b[39m\u001b[39m\"\u001b[39m}, exposed\u001b[39m=\u001b[39mexposed)\n\u001b[1;32m 236\u001b[0m \u001b[39mreturn\u001b[39;00m ret\n", - "File \u001b[0;32m~/miniconda3/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:206\u001b[0m, in \u001b[0;36m_finalize_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m 204\u001b[0m manager \u001b[39m=\u001b[39m get_global_manager()\n\u001b[1;32m 205\u001b[0m \u001b[39mif\u001b[39;00m manager \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 206\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 207\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcannot create \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m without \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 208\u001b[0m \u001b[39m\"\u001b[39m\u001b[39ma global spill manager\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 209\u001b[0m )\n\u001b[1;32m 211\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_manager \u001b[39m=\u001b[39m manager\n\u001b[1;32m 212\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_manager\u001b[39m.\u001b[39madd(\u001b[39mself\u001b[39m)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cugraph/structure/graph_classes.py:309\u001b[0m, in \u001b[0;36mGraph.from_dask_cudf_edgelist\u001b[0;34m(self, input_ddf, source, destination, edge_attr, renumber, store_transposed, legacy_renum_only)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_Impl\u001b[39m.\u001b[39medgelist \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 308\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mGraph already has values\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 309\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_Impl\u001b[39m.\u001b[39;49m_simpleDistributedGraphImpl__from_edgelist(\n\u001b[1;32m 310\u001b[0m input_ddf,\n\u001b[1;32m 311\u001b[0m source,\n\u001b[1;32m 312\u001b[0m destination,\n\u001b[1;32m 313\u001b[0m edge_attr,\n\u001b[1;32m 314\u001b[0m renumber,\n\u001b[1;32m 315\u001b[0m store_transposed,\n\u001b[1;32m 316\u001b[0m legacy_renum_only,\n\u001b[1;32m 317\u001b[0m )\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cugraph/structure/graph_implementation/simpleDistributedGraph.py:272\u001b[0m, in \u001b[0;36msimpleDistributedGraphImpl.__from_edgelist\u001b[0;34m(self, input_ddf, source, destination, edge_attr, renumber, store_transposed, legacy_renum_only)\u001b[0m\n\u001b[1;32m 268\u001b[0m dst_col_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrenumber_map\u001b[39m.\u001b[39mrenumbered_dst_col_name\n\u001b[1;32m 270\u001b[0m ddf \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39medgelist\u001b[39m.\u001b[39medgelist_df\n\u001b[0;32m--> 272\u001b[0m num_edges \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39;49m(ddf)\n\u001b[1;32m 273\u001b[0m edge_data \u001b[39m=\u001b[39m get_distributed_data(ddf)\n\u001b[1;32m 275\u001b[0m graph_props \u001b[39m=\u001b[39m GraphProperties(\n\u001b[1;32m 276\u001b[0m is_multigraph\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproperties\u001b[39m.\u001b[39mmulti_edge,\n\u001b[1;32m 277\u001b[0m is_symmetric\u001b[39m=\u001b[39m\u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproperties\u001b[39m.\u001b[39mdirected,\n\u001b[1;32m 278\u001b[0m )\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/dataframe/core.py:4775\u001b[0m, in \u001b[0;36mDataFrame.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 4773\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__len__\u001b[39m()\n\u001b[1;32m 4774\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 4775\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mlen\u001b[39;49m(s)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/dataframe/core.py:843\u001b[0m, in \u001b[0;36m_Frame.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 840\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 841\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreduction(\n\u001b[1;32m 842\u001b[0m \u001b[39mlen\u001b[39;49m, np\u001b[39m.\u001b[39;49msum, token\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mlen\u001b[39;49m\u001b[39m\"\u001b[39;49m, meta\u001b[39m=\u001b[39;49m\u001b[39mint\u001b[39;49m, split_every\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m\n\u001b[0;32m--> 843\u001b[0m )\u001b[39m.\u001b[39;49mcompute()\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/base.py:314\u001b[0m, in \u001b[0;36mDaskMethodsMixin.compute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcompute\u001b[39m(\u001b[39mself\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 291\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Compute this dask collection\u001b[39;00m\n\u001b[1;32m 292\u001b[0m \n\u001b[1;32m 293\u001b[0m \u001b[39m This turns a lazy Dask collection into its in-memory equivalent.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[39m dask.base.compute\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m (result,) \u001b[39m=\u001b[39m compute(\u001b[39mself\u001b[39;49m, traverse\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 315\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/dask/base.py:599\u001b[0m, in \u001b[0;36mcompute\u001b[0;34m(traverse, optimize_graph, scheduler, get, *args, **kwargs)\u001b[0m\n\u001b[1;32m 596\u001b[0m keys\u001b[39m.\u001b[39mappend(x\u001b[39m.\u001b[39m__dask_keys__())\n\u001b[1;32m 597\u001b[0m postcomputes\u001b[39m.\u001b[39mappend(x\u001b[39m.\u001b[39m__dask_postcompute__())\n\u001b[0;32m--> 599\u001b[0m results \u001b[39m=\u001b[39m schedule(dsk, keys, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 600\u001b[0m \u001b[39mreturn\u001b[39;00m repack([f(r, \u001b[39m*\u001b[39ma) \u001b[39mfor\u001b[39;00m r, (f, a) \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(results, postcomputes)])\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:3186\u001b[0m, in \u001b[0;36mClient.get\u001b[0;34m(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)\u001b[0m\n\u001b[1;32m 3184\u001b[0m should_rejoin \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[1;32m 3185\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3186\u001b[0m results \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mgather(packed, asynchronous\u001b[39m=\u001b[39;49masynchronous, direct\u001b[39m=\u001b[39;49mdirect)\n\u001b[1;32m 3187\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 3188\u001b[0m \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m futures\u001b[39m.\u001b[39mvalues():\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:2345\u001b[0m, in \u001b[0;36mClient.gather\u001b[0;34m(self, futures, errors, direct, asynchronous)\u001b[0m\n\u001b[1;32m 2343\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 2344\u001b[0m local_worker \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m-> 2345\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msync(\n\u001b[1;32m 2346\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_gather,\n\u001b[1;32m 2347\u001b[0m futures,\n\u001b[1;32m 2348\u001b[0m errors\u001b[39m=\u001b[39;49merrors,\n\u001b[1;32m 2349\u001b[0m direct\u001b[39m=\u001b[39;49mdirect,\n\u001b[1;32m 2350\u001b[0m local_worker\u001b[39m=\u001b[39;49mlocal_worker,\n\u001b[1;32m 2351\u001b[0m asynchronous\u001b[39m=\u001b[39;49masynchronous,\n\u001b[1;32m 2352\u001b[0m )\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:349\u001b[0m, in \u001b[0;36mSyncMethodMixin.sync\u001b[0;34m(self, func, asynchronous, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[39mreturn\u001b[39;00m future\n\u001b[1;32m 348\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 349\u001b[0m \u001b[39mreturn\u001b[39;00m sync(\n\u001b[1;32m 350\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mloop, func, \u001b[39m*\u001b[39;49margs, callback_timeout\u001b[39m=\u001b[39;49mcallback_timeout, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs\n\u001b[1;32m 351\u001b[0m )\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:416\u001b[0m, in \u001b[0;36msync\u001b[0;34m(loop, func, callback_timeout, *args, **kwargs)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[39mif\u001b[39;00m error:\n\u001b[1;32m 415\u001b[0m typ, exc, tb \u001b[39m=\u001b[39m error\n\u001b[0;32m--> 416\u001b[0m \u001b[39mraise\u001b[39;00m exc\u001b[39m.\u001b[39mwith_traceback(tb)\n\u001b[1;32m 417\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 418\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/utils.py:389\u001b[0m, in \u001b[0;36msync..f\u001b[0;34m()\u001b[0m\n\u001b[1;32m 387\u001b[0m future \u001b[39m=\u001b[39m wait_for(future, callback_timeout)\n\u001b[1;32m 388\u001b[0m future \u001b[39m=\u001b[39m asyncio\u001b[39m.\u001b[39mensure_future(future)\n\u001b[0;32m--> 389\u001b[0m result \u001b[39m=\u001b[39m \u001b[39myield\u001b[39;00m future\n\u001b[1;32m 390\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 391\u001b[0m error \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/tornado/gen.py:769\u001b[0m, in \u001b[0;36mRunner.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 766\u001b[0m exc_info \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 768\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 769\u001b[0m value \u001b[39m=\u001b[39m future\u001b[39m.\u001b[39;49mresult()\n\u001b[1;32m 770\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 771\u001b[0m exc_info \u001b[39m=\u001b[39m sys\u001b[39m.\u001b[39mexc_info()\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/client.py:2208\u001b[0m, in \u001b[0;36mClient._gather\u001b[0;34m(self, futures, errors, direct, local_worker)\u001b[0m\n\u001b[1;32m 2206\u001b[0m exc \u001b[39m=\u001b[39m CancelledError(key)\n\u001b[1;32m 2207\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 2208\u001b[0m \u001b[39mraise\u001b[39;00m exception\u001b[39m.\u001b[39mwith_traceback(traceback)\n\u001b[1;32m 2209\u001b[0m \u001b[39mraise\u001b[39;00m exc\n\u001b[1;32m 2210\u001b[0m \u001b[39mif\u001b[39;00m errors \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mskip\u001b[39m\u001b[39m\"\u001b[39m:\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/contextlib.py:79\u001b[0m, in \u001b[0;36minner\u001b[0;34m()\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[39m@wraps\u001b[39m(func)\n\u001b[1;32m 77\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minner\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds):\n\u001b[1;32m 78\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_recreate_cm():\n\u001b[0;32m---> 79\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py:2937\u001b[0m, in \u001b[0;36m_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2934\u001b[0m \u001b[39m# Some objects require threadlocal state during deserialization, e.g. to\u001b[39;00m\n\u001b[1;32m 2935\u001b[0m \u001b[39m# detect the current worker\u001b[39;00m\n\u001b[1;32m 2936\u001b[0m \u001b[39mif\u001b[39;00m function \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 2937\u001b[0m function \u001b[39m=\u001b[39m loads_function(function)\n\u001b[1;32m 2938\u001b[0m \u001b[39mif\u001b[39;00m args \u001b[39mand\u001b[39;00m \u001b[39misinstance\u001b[39m(args, \u001b[39mbytes\u001b[39m):\n\u001b[1;32m 2939\u001b[0m args \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(args)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/worker.py:2925\u001b[0m, in \u001b[0;36mloads_function\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2923\u001b[0m result \u001b[39m=\u001b[39m cache_loads[bytes_object]\n\u001b[1;32m 2924\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m:\n\u001b[0;32m-> 2925\u001b[0m result \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(bytes_object)\n\u001b[1;32m 2926\u001b[0m cache_loads[bytes_object] \u001b[39m=\u001b[39m result\n\u001b[1;32m 2927\u001b[0m \u001b[39mreturn\u001b[39;00m result\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/distributed/protocol/pickle.py:96\u001b[0m, in \u001b[0;36mloads\u001b[0;34m()\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[39mreturn\u001b[39;00m pickle\u001b[39m.\u001b[39mloads(x, buffers\u001b[39m=\u001b[39mbuffers)\n\u001b[1;32m 95\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 96\u001b[0m \u001b[39mreturn\u001b[39;00m pickle\u001b[39m.\u001b[39mloads(x)\n\u001b[1;32m 97\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m:\n\u001b[1;32m 98\u001b[0m logger\u001b[39m.\u001b[39minfo(\u001b[39m\"\u001b[39m\u001b[39mFailed to deserialize \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m, x[:\u001b[39m10000\u001b[39m], exc_info\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py:176\u001b[0m, in \u001b[0;36mhost_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Perform device-side deserialization tasks.\u001b[39;00m\n\u001b[1;32m 155\u001b[0m \n\u001b[1;32m 156\u001b[0m \u001b[39mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[39m:meta private:\u001b[39;00m\n\u001b[1;32m 171\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 172\u001b[0m frames \u001b[39m=\u001b[39m [\n\u001b[1;32m 173\u001b[0m cudf\u001b[39m.\u001b[39mcore\u001b[39m.\u001b[39mbuffer\u001b[39m.\u001b[39mas_buffer(f) \u001b[39mif\u001b[39;00m c \u001b[39melse\u001b[39;00m f\n\u001b[1;32m 174\u001b[0m \u001b[39mfor\u001b[39;00m c, f \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(header[\u001b[39m\"\u001b[39m\u001b[39mis-cuda\u001b[39m\u001b[39m\"\u001b[39m], \u001b[39mmap\u001b[39m(\u001b[39mmemoryview\u001b[39m, frames))\n\u001b[1;32m 175\u001b[0m ]\n\u001b[0;32m--> 176\u001b[0m obj \u001b[39m=\u001b[39m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mdevice_deserialize(header, frames)\n\u001b[1;32m 177\u001b[0m \u001b[39mreturn\u001b[39;00m obj\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/abc.py:130\u001b[0m, in \u001b[0;36mdevice_deserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 125\u001b[0m typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 126\u001b[0m frames \u001b[39m=\u001b[39m [\n\u001b[1;32m 127\u001b[0m cudf\u001b[39m.\u001b[39mcore\u001b[39m.\u001b[39mbuffer\u001b[39m.\u001b[39mas_buffer(f) \u001b[39mif\u001b[39;00m c \u001b[39melse\u001b[39;00m \u001b[39mmemoryview\u001b[39m(f)\n\u001b[1;32m 128\u001b[0m \u001b[39mfor\u001b[39;00m c, f \u001b[39min\u001b[39;00m \u001b[39mzip\u001b[39m(header[\u001b[39m\"\u001b[39m\u001b[39mis-cuda\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 129\u001b[0m ]\n\u001b[0;32m--> 130\u001b[0m \u001b[39mreturn\u001b[39;00m typ\u001b[39m.\u001b[39mdeserialize(header, frames)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/dataframe.py:1019\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1016\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 1017\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize\u001b[39m(\u001b[39mcls\u001b[39m, header, frames):\n\u001b[1;32m 1018\u001b[0m index_nframes \u001b[39m=\u001b[39m header[\u001b[39m\"\u001b[39m\u001b[39mindex_frame_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m-> 1019\u001b[0m obj \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mdeserialize(\n\u001b[1;32m 1020\u001b[0m header, frames[header[\u001b[39m\"\u001b[39m\u001b[39mindex_frame_count\u001b[39m\u001b[39m\"\u001b[39m] :]\n\u001b[1;32m 1021\u001b[0m )\n\u001b[1;32m 1023\u001b[0m idx_typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m][\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 1024\u001b[0m index \u001b[39m=\u001b[39m idx_typ\u001b[39m.\u001b[39mdeserialize(header[\u001b[39m\"\u001b[39m\u001b[39mindex\u001b[39m\u001b[39m\"\u001b[39m], frames[:index_nframes])\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/frame.py:106\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 104\u001b[0m cls_deserialize \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 105\u001b[0m column_names \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mcolumn_names\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m--> 106\u001b[0m columns \u001b[39m=\u001b[39m deserialize_columns(header[\u001b[39m\"\u001b[39m\u001b[39mcolumns\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 107\u001b[0m \u001b[39mreturn\u001b[39;00m cls_deserialize\u001b[39m.\u001b[39m_from_data(\u001b[39mdict\u001b[39m(\u001b[39mzip\u001b[39m(column_names, columns)))\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:2450\u001b[0m, in \u001b[0;36mdeserialize_columns\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2448\u001b[0m col_frame_count \u001b[39m=\u001b[39m meta[\u001b[39m\"\u001b[39m\u001b[39mframe_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 2449\u001b[0m col_typ \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(meta[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m-> 2450\u001b[0m colobj \u001b[39m=\u001b[39m col_typ\u001b[39m.\u001b[39mdeserialize(meta, frames[:col_frame_count])\n\u001b[1;32m 2451\u001b[0m columns\u001b[39m.\u001b[39mappend(colobj)\n\u001b[1;32m 2452\u001b[0m \u001b[39m# Advance frames\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:1216\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1214\u001b[0m dtype \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mdtype\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[1;32m 1215\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m\"\u001b[39m \u001b[39min\u001b[39;00m header:\n\u001b[0;32m-> 1216\u001b[0m data, frames \u001b[39m=\u001b[39m unpack(header[\u001b[39m\"\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m\"\u001b[39m], frames)\n\u001b[1;32m 1217\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1218\u001b[0m data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/column/column.py:1204\u001b[0m, in \u001b[0;36munpack\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1202\u001b[0m count \u001b[39m=\u001b[39m header[\u001b[39m\"\u001b[39m\u001b[39mframe_count\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 1203\u001b[0m klass \u001b[39m=\u001b[39m pickle\u001b[39m.\u001b[39mloads(header[\u001b[39m\"\u001b[39m\u001b[39mtype-serialized\u001b[39m\u001b[39m\"\u001b[39m])\n\u001b[0;32m-> 1204\u001b[0m obj \u001b[39m=\u001b[39m klass\u001b[39m.\u001b[39mdeserialize(header, frames[:count])\n\u001b[1;32m 1205\u001b[0m \u001b[39mreturn\u001b[39;00m obj, frames[count:]\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:574\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdeserialize\u001b[39m(\u001b[39mcls\u001b[39m, header: \u001b[39mdict\u001b[39m, frames: \u001b[39mlist\u001b[39m):\n\u001b[1;32m 569\u001b[0m \u001b[39m# TODO: because of the hack in `SpillableBuffer.serialize()` where\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[39m# deserialize into `SpillableBufferSlice` when the frames hasn't been\u001b[39;00m\n\u001b[1;32m 573\u001b[0m \u001b[39m# copied.\u001b[39;00m\n\u001b[0;32m--> 574\u001b[0m \u001b[39mreturn\u001b[39;00m SpillableBuffer\u001b[39m.\u001b[39mdeserialize(header, frames)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/buffer.py:335\u001b[0m, in \u001b[0;36mdeserialize\u001b[0;34m()\u001b[0m\n\u001b[1;32m 332\u001b[0m \u001b[39mreturn\u001b[39;00m frame \u001b[39m# The frame is already deserialized\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(frame, \u001b[39m\"\u001b[39m\u001b[39m__cuda_array_interface__\u001b[39m\u001b[39m\"\u001b[39m):\n\u001b[0;32m--> 335\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_from_device_memory(frame)\n\u001b[1;32m 336\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m_from_host_memory(frame)\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:235\u001b[0m, in \u001b[0;36m_from_device_memory\u001b[0;34m()\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Create a spillabe buffer from device memory.\u001b[39;00m\n\u001b[1;32m 219\u001b[0m \n\u001b[1;32m 220\u001b[0m \u001b[39mNo data is being copied.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[39m Buffer representing the same device memory as `data`\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 234\u001b[0m ret \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m_from_device_memory(data)\n\u001b[0;32m--> 235\u001b[0m ret\u001b[39m.\u001b[39m_finalize_init(ptr_desc\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mtype\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39mgpu\u001b[39m\u001b[39m\"\u001b[39m}, exposed\u001b[39m=\u001b[39mexposed)\n\u001b[1;32m 236\u001b[0m \u001b[39mreturn\u001b[39;00m ret\n", + "File \u001b[0;32m~/miniforge/envs/cugraph_0411/lib/python3.10/site-packages/cudf/core/buffer/spillable_buffer.py:206\u001b[0m, in \u001b[0;36m_finalize_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m 204\u001b[0m manager \u001b[39m=\u001b[39m get_global_manager()\n\u001b[1;32m 205\u001b[0m \u001b[39mif\u001b[39;00m manager \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 206\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 207\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcannot create \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m\u001b[39m}\u001b[39;00m\u001b[39m without \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 208\u001b[0m \u001b[39m\"\u001b[39m\u001b[39ma global spill manager\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 209\u001b[0m )\n\u001b[1;32m 211\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_manager \u001b[39m=\u001b[39m manager\n\u001b[1;32m 212\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_manager\u001b[39m.\u001b[39madd(\u001b[39mself\u001b[39m)\n", "\u001b[0;31mValueError\u001b[0m: cannot create without a global spill manager" ] } diff --git a/notebooks/demo/nx_cugraph_demo.ipynb b/notebooks/demo/nx_cugraph_demo.ipynb index 6e50370ed80..f1ce80aa188 100644 --- a/notebooks/demo/nx_cugraph_demo.ipynb +++ b/notebooks/demo/nx_cugraph_demo.ipynb @@ -20,7 +20,7 @@ "Using `nx-cugraph` with this notebook requires the following: \n", "- NVIDIA GPU, Pascal architecture or later\n", "- CUDA 11.2, 11.4, 11.5, 11.8, or 12.0\n", - "- Python versions 3.9, 3.10, or 3.11\n", + "- Python versions 3.10, 3.11, or 3.12\n", "- NetworkX >= version 3.2\n", " - _NetworkX 3.0 supports dispatching and is compatible with `nx-cugraph`, but this notebook will demonstrate features added in 3.2_\n", " - At the time of this writing, NetworkX 3.2 is only available from source and can be installed by following the [development version install instructions](https://github.com/networkx/networkx/blob/main/INSTALL.rst#install-the-development-version).\n", diff --git a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml index ea30b652286..42cbcab5008 100644 --- a/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml +++ b/python/cugraph-dgl/conda/cugraph_dgl_dev_cuda-118.yaml @@ -9,17 +9,17 @@ channels: - conda-forge - nvidia dependencies: -- cugraph==24.10.*,>=0.0.0a0 +- cugraph==24.12.*,>=0.0.0a0 - dgl>=1.1.0.cu* - pandas - pre-commit -- pylibcugraphops==24.10.*,>=0.0.0a0 +- pylibcugraphops==24.12.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-xdist - pytorch-cuda==11.8 -- pytorch>=2.0 +- pytorch>=2.3,<2.4.0a0 - scipy - tensordict>=0.1.2 name: cugraph_dgl_dev_cuda-118 diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py index 21b70b05f3a..4f36353cb18 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/dataloader.py @@ -140,6 +140,10 @@ def __init__( self.__graph = graph self.__device = device + @property + def _batch_size(self): + return self.__batch_size + @property def dataset( self, diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py index 1a35c3ea027..ecc51006995 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/neighbor_sampler.py @@ -18,7 +18,7 @@ from typing import Sequence, Optional, Union, List, Tuple, Iterator -from cugraph.gnn import UniformNeighborSampler, DistSampleWriter +from cugraph.gnn import UniformNeighborSampler, BiasedNeighborSampler, DistSampleWriter from cugraph.utilities.utils import import_optional import cugraph_dgl @@ -93,7 +93,6 @@ def __init__( If provided, the probability of each neighbor being sampled is proportional to the edge feature with the given name. Mutually exclusive with mask. - Currently unsupported. mask: str Optional. If proivided, only neighbors where the edge mask @@ -133,10 +132,6 @@ def __init__( raise NotImplementedError( "Edge masking is currently unsupported by cuGraph-DGL" ) - if prob: - raise NotImplementedError( - "Edge masking is currently unsupported by cuGraph-DGL" - ) if prefetch_edge_feats: warnings.warn("'prefetch_edge_feats' is ignored by cuGraph-DGL") if prefetch_node_feats: @@ -146,6 +141,8 @@ def __init__( if fused: warnings.warn("'fused' is ignored by cuGraph-DGL") + self.__prob_attr = prob + self.fanouts = fanouts_per_layer reverse_fanouts = fanouts_per_layer.copy() reverse_fanouts.reverse() @@ -180,8 +177,14 @@ def sample( format=kwargs.pop("format", "parquet"), ) - ds = UniformNeighborSampler( - g._graph(self.edge_dir), + sampling_clx = ( + UniformNeighborSampler + if self.__prob_attr is None + else BiasedNeighborSampler + ) + + ds = sampling_clx( + g._graph(self.edge_dir, prob_attr=self.__prob_attr), writer, compression="CSR", fanout=self._reversed_fanout_vals, @@ -194,10 +197,8 @@ def sample( if g.is_homogeneous: indices = torch.concat(list(indices)) - ds.sample_from_nodes(indices, batch_size=batch_size) - return HomogeneousSampleReader( - ds.get_reader(), self.output_format, self.edge_dir - ) + reader = ds.sample_from_nodes(indices.long(), batch_size=batch_size) + return HomogeneousSampleReader(reader, self.output_format, self.edge_dir) raise ValueError( "Sampling heterogeneous graphs is currently" diff --git a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py index 731ec1b8d6f..7ea608e7e53 100644 --- a/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py +++ b/python/cugraph-dgl/cugraph_dgl/dataloading/sampler.py @@ -20,7 +20,6 @@ create_homogeneous_sampled_graphs_from_tensors_csc, ) -from cugraph.gnn import DistSampleReader from cugraph.utilities.utils import import_optional @@ -33,14 +32,18 @@ class SampleReader: Iterator that processes results from the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader, output_format: str = "dgl.Block"): + def __init__( + self, + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]], + output_format: str = "dgl.Block", + ): """ Constructs a new SampleReader. Parameters ---------- - base_reader: DistSampleReader - The reader responsible for loading saved samples produced by + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] + The iterator responsible for loading saved samples produced by the cuGraph distributed sampler. """ self.__output_format = output_format @@ -83,7 +86,7 @@ class HomogeneousSampleReader(SampleReader): def __init__( self, - base_reader: DistSampleReader, + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]], output_format: str = "dgl.Block", edge_dir="in", ): @@ -92,7 +95,7 @@ def __init__( Parameters ---------- - base_reader: DistSampleReader + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] The reader responsible for loading saved samples produced by the cuGraph distributed sampler. output_format: str diff --git a/python/cugraph-dgl/cugraph_dgl/graph.py b/python/cugraph-dgl/cugraph_dgl/graph.py index 2eba13c6958..88b93656fa8 100644 --- a/python/cugraph-dgl/cugraph_dgl/graph.py +++ b/python/cugraph-dgl/cugraph_dgl/graph.py @@ -29,6 +29,7 @@ HeteroNodeDataView, HeteroEdgeView, HeteroEdgeDataView, + EmbeddingView, ) @@ -311,7 +312,7 @@ def add_edges( self.__graph = None self.__vertex_offsets = None - def num_nodes(self, ntype: str = None) -> int: + def num_nodes(self, ntype: Optional[str] = None) -> int: """ Returns the number of nodes of ntype, or if ntype is not provided, the total number of nodes in the graph. @@ -321,7 +322,7 @@ def num_nodes(self, ntype: str = None) -> int: return self.__num_nodes_dict[ntype] - def number_of_nodes(self, ntype: str = None) -> int: + def number_of_nodes(self, ntype: Optional[str] = None) -> int: """ Alias for num_nodes. """ @@ -380,7 +381,7 @@ def _vertex_offsets(self) -> Dict[str, int]: return dict(self.__vertex_offsets) - def __get_edgelist(self) -> Dict[str, "torch.Tensor"]: + def __get_edgelist(self, prob_attr=None) -> Dict[str, "torch.Tensor"]: """ This function always returns src/dst labels with respect to the out direction. @@ -430,63 +431,71 @@ def __get_edgelist(self) -> Dict[str, "torch.Tensor"]: ) ) + num_edges_t = torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" + ) + if self.is_multi_gpu: rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() - num_edges_t = torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" - ) num_edges_all_t = torch.empty( world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" ) torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) - if rank > 0: - start_offsets = num_edges_all_t[:rank].T.sum(axis=1) - edge_id_array = torch.concat( + start_offsets = num_edges_all_t[:rank].T.sum(axis=1) + + else: + rank = 0 + start_offsets = torch.zeros( + (len(sorted_keys),), dtype=torch.int64, device="cuda" + ) + num_edges_all_t = num_edges_t.reshape((1, num_edges_t.numel())) + + # Use pinned memory here for fast access to CPU/WG storage + edge_id_array_per_type = [ + torch.arange( + start_offsets[i], + start_offsets[i] + num_edges_all_t[rank][i], + dtype=torch.int64, + device="cpu", + ).pin_memory() + for i in range(len(sorted_keys)) + ] + + # Retrieve the weights from the appropriate feature(s) + # DGL implicitly requires all edge types use the same + # feature name. + if prob_attr is None: + weights = None + else: + if len(sorted_keys) > 1: + weights = torch.concat( [ - torch.arange( - start_offsets[i], - start_offsets[i] + num_edges_all_t[rank][i], - dtype=torch.int64, - device="cuda", - ) - for i in range(len(sorted_keys)) + self.edata[prob_attr][sorted_keys[i]][ix] + for i, ix in enumerate(edge_id_array_per_type) ] ) else: - edge_id_array = torch.concat( - [ - torch.arange( - self.__edge_indices[et].shape[1], - dtype=torch.int64, - device="cuda", - ) - for et in sorted_keys - ] - ) + weights = self.edata[prob_attr][edge_id_array_per_type[0]] - else: - # single GPU - edge_id_array = torch.concat( - [ - torch.arange( - self.__edge_indices[et].shape[1], - dtype=torch.int64, - device="cuda", - ) - for et in sorted_keys - ] - ) + # Safe to move this to cuda because the consumer will always + # move it to cuda if it isn't already there. + edge_id_array = torch.concat(edge_id_array_per_type).cuda() - return { + edgelist_dict = { "src": edge_index[0], "dst": edge_index[1], "etp": edge_type_array, "eid": edge_id_array, } + if weights is not None: + edgelist_dict["wgt"] = weights + + return edgelist_dict + @property def is_homogeneous(self): return len(self.__num_edges_dict) <= 1 and len(self.__num_nodes_dict) <= 1 @@ -507,7 +516,9 @@ def _resource_handle(self): return self.__handle def _graph( - self, direction: str + self, + direction: str, + prob_attr: Optional[str] = None, ) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: """ Gets the pylibcugraph Graph object with edges pointing in the given direction @@ -521,12 +532,16 @@ def _graph( is_multigraph=True, is_symmetric=False ) - if self.__graph is not None and self.__graph[1] != direction: - self.__graph = None + if self.__graph is not None: + if ( + self.__graph["direction"] != direction + or self.__graph["prob_attr"] != prob_attr + ): + self.__graph = None if self.__graph is None: src_col, dst_col = ("src", "dst") if direction == "out" else ("dst", "src") - edgelist_dict = self.__get_edgelist() + edgelist_dict = self.__get_edgelist(prob_attr=prob_attr) if self.is_multi_gpu: rank = torch.distributed.get_rank() @@ -535,40 +550,42 @@ def _graph( vertices_array = cupy.arange(self.num_nodes(), dtype="int64") vertices_array = cupy.array_split(vertices_array, world_size)[rank] - self.__graph = ( - pylibcugraph.MGGraph( - self._resource_handle, - graph_properties, - [cupy.asarray(edgelist_dict[src_col]).astype("int64")], - [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], - vertices_array=[vertices_array], - edge_id_array=[cupy.asarray(edgelist_dict["eid"])], - edge_type_array=[cupy.asarray(edgelist_dict["etp"])], - ), - direction, + graph = pylibcugraph.MGGraph( + self._resource_handle, + graph_properties, + [cupy.asarray(edgelist_dict[src_col]).astype("int64")], + [cupy.asarray(edgelist_dict[dst_col]).astype("int64")], + vertices_array=[vertices_array], + edge_id_array=[cupy.asarray(edgelist_dict["eid"])], + edge_type_array=[cupy.asarray(edgelist_dict["etp"])], + weight_array=[cupy.asarray(edgelist_dict["wgt"])] + if "wgt" in edgelist_dict + else None, ) else: - self.__graph = ( - pylibcugraph.SGGraph( - self._resource_handle, - graph_properties, - cupy.asarray(edgelist_dict[src_col]).astype("int64"), - cupy.asarray(edgelist_dict[dst_col]).astype("int64"), - vertices_array=cupy.arange(self.num_nodes(), dtype="int64"), - edge_id_array=cupy.asarray(edgelist_dict["eid"]), - edge_type_array=cupy.asarray(edgelist_dict["etp"]), - ), - direction, + graph = pylibcugraph.SGGraph( + self._resource_handle, + graph_properties, + cupy.asarray(edgelist_dict[src_col]).astype("int64"), + cupy.asarray(edgelist_dict[dst_col]).astype("int64"), + vertices_array=cupy.arange(self.num_nodes(), dtype="int64"), + edge_id_array=cupy.asarray(edgelist_dict["eid"]), + edge_type_array=cupy.asarray(edgelist_dict["etp"]), + weight_array=cupy.asarray(edgelist_dict["wgt"]) + if "wgt" in edgelist_dict + else None, ) - return self.__graph[0] + self.__graph = {"graph": graph, "direction": direction, "prob_attr": prob_attr} + + return self.__graph["graph"] def _has_n_emb(self, ntype: str, emb_name: str) -> bool: return (ntype, emb_name) in self.__ndata_storage def _get_n_emb( - self, ntype: str, emb_name: str, u: Union[str, TensorType] - ) -> "torch.Tensor": + self, ntype: Union[str, None], emb_name: str, u: Union[str, TensorType] + ) -> Union["torch.Tensor", "EmbeddingView"]: """ Gets the embedding of a single node type. Unlike DGL, this function takes the string node @@ -583,11 +600,11 @@ def _get_n_emb( u: Union[str, TensorType] Nodes to get the representation of, or ALL to get the representation of all nodes of - the given type. + the given type (returns embedding view). Returns ------- - torch.Tensor + Union[torch.Tensor, cugraph_dgl.view.EmbeddingView] The embedding of the given edge type with the given embedding name. """ @@ -598,7 +615,9 @@ def _get_n_emb( raise ValueError("Must provide the node type for a heterogeneous graph") if dgl.base.is_all(u): - u = torch.arange(self.num_nodes(ntype), dtype=self.idtype, device="cpu") + return EmbeddingView( + self.__ndata_storage[ntype, emb_name], self.num_nodes(ntype) + ) try: return self.__ndata_storage[ntype, emb_name].fetch( @@ -644,7 +663,9 @@ def _get_e_emb( etype = self.to_canonical_etype(etype) if dgl.base.is_all(u): - u = torch.arange(self.num_edges(etype), dtype=self.idtype, device="cpu") + return EmbeddingView( + self.__edata_storage[etype, emb_name], self.num_edges(etype) + ) try: return self.__edata_storage[etype, emb_name].fetch( diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py index d2460f814c9..fcd5a26aee6 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py @@ -129,7 +129,7 @@ def __init__( if csrc_ids is not None: if csrc_ids.numel() != self._num_src_nodes + 1: raise RuntimeError( - f"Size mismatch for 'csrc_ids': expected ({size[0]+1},), " + f"Size mismatch for 'csrc_ids': expected ({size[0] + 1},), " f"but got {tuple(csrc_ids.size())}" ) csrc_ids = csrc_ids.contiguous() @@ -137,7 +137,7 @@ def __init__( if cdst_ids is not None: if cdst_ids.numel() != self._num_dst_nodes + 1: raise RuntimeError( - f"Size mismatch for 'cdst_ids': expected ({size[1]+1},), " + f"Size mismatch for 'cdst_ids': expected ({size[1] + 1},), " f"but got {tuple(cdst_ids.size())}" ) cdst_ids = cdst_ids.contiguous() diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py index ef47875463d..419ec7790a9 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import cugraph_dgl.dataloading import pytest @@ -48,9 +49,12 @@ def test_dataloader_basic_homogeneous(): assert len(out_t) <= 2 -def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): +def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1, prob_attr=None): # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler(fanouts) + sampler = dgl.dataloading.NeighborSampler( + fanouts, + prob=prob_attr, + ) dataloader = dgl.dataloading.DataLoader( g, train_nid, @@ -71,8 +75,13 @@ def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): return dgl_output -def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) +def sample_cugraph_dgl_graphs( + cugraph_g, train_nid, fanouts, batch_size=1, prob_attr=None +): + sampler = cugraph_dgl.dataloading.NeighborSampler( + fanouts, + prob=prob_attr, + ) dataloader = cugraph_dgl.dataloading.FutureDataLoader( cugraph_g, @@ -126,3 +135,41 @@ def test_same_homogeneousgraph_results(ix, batch_size): dgl_output[0]["blocks"][0].num_edges() == cugraph_output[0]["blocks"][0].num_edges() ) + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_biased_homogeneous(): + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + wgt = torch.tensor([1, 1, 2, 0, 0, 0, 2, 1], dtype=torch.float32) + + train_nid = torch.tensor([0, 1]) + # Create a heterograph with 3 node types and 3 edges types. + dgl_g = dgl.graph((src, dst)) + dgl_g.edata["wgt"] = wgt + + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=False) + cugraph_g.add_nodes(9) + cugraph_g.add_edges(u=src, v=dst, data={"wgt": wgt}) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [4], batch_size=2, prob_attr="wgt") + cugraph_output = sample_cugraph_dgl_graphs( + cugraph_g, train_nid, [4], batch_size=2, prob_attr="wgt" + ) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) + assert 5 == cugraph_output[0]["blocks"][0].num_edges() diff --git a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py index b32233f16a6..061f4fa2077 100644 --- a/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py +++ b/python/cugraph-dgl/cugraph_dgl/tests/dataloading/test_dataloader_mg.py @@ -82,9 +82,18 @@ def test_dataloader_basic_homogeneous(): ) -def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): +def sample_dgl_graphs( + g, + train_nid, + fanouts, + batch_size=1, + prob_attr=None, +): # Single fanout to match cugraph - sampler = dgl.dataloading.NeighborSampler(fanouts) + sampler = dgl.dataloading.NeighborSampler( + fanouts, + prob=prob_attr, + ) dataloader = dgl.dataloading.DataLoader( g, train_nid, @@ -105,8 +114,17 @@ def sample_dgl_graphs(g, train_nid, fanouts, batch_size=1): return dgl_output -def sample_cugraph_dgl_graphs(cugraph_g, train_nid, fanouts, batch_size=1): - sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts) +def sample_cugraph_dgl_graphs( + cugraph_g, + train_nid, + fanouts, + batch_size=1, + prob_attr=None, +): + sampler = cugraph_dgl.dataloading.NeighborSampler( + fanouts, + prob=prob_attr, + ) dataloader = cugraph_dgl.dataloading.FutureDataLoader( cugraph_g, @@ -179,3 +197,58 @@ def test_same_homogeneousgraph_results_mg(ix, batch_size): args=(world_size, uid, ix, batch_size), nprocs=world_size, ) + + +def run_test_dataloader_biased_homogeneous(rank, world_size, uid): + init_pytorch_worker(rank, world_size, uid, True) + + src = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]) + (rank * 9) + dst = torch.tensor([0, 0, 0, 0, 1, 1, 1, 1]) + (rank * 9) + wgt = torch.tensor( + [0.1, 0.1, 0.2, 0, 0, 0, 0.2, 0.1] * world_size, dtype=torch.float32 + ) + + train_nid = torch.tensor([0, 1]) + (rank * 9) + # Create a heterograph with 3 node types and 3 edge types. + dgl_g = dgl.graph((src, dst)) + dgl_g.edata["wgt"] = wgt[:8] + + cugraph_g = cugraph_dgl.Graph(is_multi_gpu=True) + cugraph_g.add_nodes(9 * world_size) + cugraph_g.add_edges(u=src, v=dst, data={"wgt": wgt}) + + dgl_output = sample_dgl_graphs(dgl_g, train_nid, [4], batch_size=2, prob_attr="wgt") + cugraph_output = sample_cugraph_dgl_graphs( + cugraph_g, train_nid, [4], batch_size=2, prob_attr="wgt" + ) + + cugraph_output_nodes = cugraph_output[0]["output_nodes"].cpu().numpy() + dgl_output_nodes = dgl_output[0]["output_nodes"].cpu().numpy() + + np.testing.assert_array_equal( + np.sort(cugraph_output_nodes), np.sort(dgl_output_nodes) + ) + assert ( + dgl_output[0]["blocks"][0].num_dst_nodes() + == cugraph_output[0]["blocks"][0].num_dst_nodes() + ) + assert ( + dgl_output[0]["blocks"][0].num_edges() + == cugraph_output[0]["blocks"][0].num_edges() + ) + + assert 5 == cugraph_output[0]["blocks"][0].num_edges() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif(isinstance(dgl, MissingModule), reason="dgl not available") +def test_dataloader_biased_homogeneous_mg(): + uid = cugraph_comms_create_unique_id() + # Limit the number of GPUs this test is run with + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_dataloader_biased_homogeneous, + args=(world_size, uid), + nprocs=world_size, + ) diff --git a/python/cugraph-dgl/cugraph_dgl/view.py b/python/cugraph-dgl/cugraph_dgl/view.py index dbc53e73b6a..4de9406be07 100644 --- a/python/cugraph-dgl/cugraph_dgl/view.py +++ b/python/cugraph-dgl/cugraph_dgl/view.py @@ -12,6 +12,8 @@ # limitations under the License. +import warnings + from collections import defaultdict from collections.abc import MutableMapping from typing import Union, Dict, List, Tuple @@ -20,11 +22,45 @@ import cugraph_dgl from cugraph_dgl.typing import TensorType +from cugraph_dgl.utils.cugraph_conversion_utils import _cast_to_torch_tensor torch = import_optional("torch") dgl = import_optional("dgl") +class EmbeddingView: + def __init__(self, storage: "dgl.storages.base.FeatureStorage", ld: int): + self.__ld = ld + self.__storage = storage + + def __getitem__(self, u: TensorType) -> "torch.Tensor": + u = _cast_to_torch_tensor(u) + try: + return self.__storage.fetch( + u, + "cuda", + ) + except RuntimeError as ex: + warnings.warn( + "Got error accessing data, trying again with index on device: " + + str(ex) + ) + return self.__storage.fetch( + u.cuda(), + "cuda", + ) + + @property + def shape(self) -> "torch.Size": + try: + f = self.__storage.fetch(torch.tensor([0]), "cpu") + except RuntimeError: + f = self.__storage.fetch(torch.tensor([0], device="cuda"), "cuda") + sz = [s for s in f.shape] + sz[0] = self.__ld + return torch.Size(tuple(sz)) + + class HeteroEdgeDataView(MutableMapping): """ Duck-typed version of DGL's HeteroEdgeDataView. diff --git a/python/cugraph-dgl/examples/graphsage/node-classification-dask.py b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py new file mode 100644 index 00000000000..0481f9566bc --- /dev/null +++ b/python/cugraph-dgl/examples/graphsage/node-classification-dask.py @@ -0,0 +1,272 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Example modified from: +# https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/node_classification.py + +# Ignore Warning +import warnings +import time +import cugraph_dgl +import torch +import torch.nn as nn +import torch.nn.functional as F +import torchmetrics.functional as MF +import dgl +import dgl.nn as dglnn +from dgl.data import AsNodePredDataset +from dgl.dataloading import ( + DataLoader, + NeighborSampler, + MultiLayerFullNeighborSampler, +) +from ogb.nodeproppred import DglNodePropPredDataset +import tqdm +import argparse + +warnings.filterwarnings("ignore") + + +def set_allocators(): + import rmm + import cudf + import cupy + from rmm.allocators.torch import rmm_torch_allocator + from rmm.allocators.cupy import rmm_cupy_allocator + + mr = rmm.mr.CudaAsyncMemoryResource() + rmm.mr.set_current_device_resource(mr) + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + cupy.cuda.set_allocator(rmm_cupy_allocator) + cudf.set_option("spill", True) + + +class SAGE(nn.Module): + def __init__(self, in_size, hid_size, out_size): + super().__init__() + self.layers = nn.ModuleList() + # three-layer GraphSAGE-mean + self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) + self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) + self.dropout = nn.Dropout(0.5) + self.hid_size = hid_size + self.out_size = out_size + + def forward(self, blocks, x): + h = x + for l_id, (layer, block) in enumerate(zip(self.layers, blocks)): + h = layer(block, h) + if l_id != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + return h + + def inference(self, g, device, batch_size): + """Conduct layer-wise inference to get all the node embeddings.""" + all_node_ids = torch.arange(0, g.num_nodes()).to(device) + feat = g.get_node_storage(key="feat", ntype="_N").fetch( + all_node_ids, device=device + ) + + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) + dataloader = DataLoader( + g, + torch.arange(g.num_nodes()).to(g.device), + sampler, + device=device, + batch_size=batch_size, + shuffle=False, + drop_last=False, + num_workers=0, + ) + buffer_device = torch.device("cpu") + pin_memory = buffer_device != device + + for l_id, layer in enumerate(self.layers): + y = torch.empty( + g.num_nodes(), + self.hid_size if l_id != len(self.layers) - 1 else self.out_size, + device=buffer_device, + pin_memory=pin_memory, + ) + feat = feat.to(device) + for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): + x = feat[input_nodes] + h = layer(blocks[0], x) # len(blocks) = 1 + if l_id != len(self.layers) - 1: + h = F.relu(h) + h = self.dropout(h) + # by design, our output nodes are contiguous + y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) + feat = y + return y + + +def evaluate(model, graph, dataloader): + model.eval() + ys = [] + y_hats = [] + for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): + with torch.no_grad(): + if isinstance(graph.ndata["feat"], dict): + x = graph.ndata["feat"]["_N"][input_nodes] + label = graph.ndata["label"]["_N"][output_nodes] + else: + x = graph.ndata["feat"][input_nodes] + label = graph.ndata["label"][output_nodes] + ys.append(label) + y_hats.append(model(blocks, x)) + num_classes = y_hats[0].shape[1] + return MF.accuracy( + torch.cat(y_hats), + torch.cat(ys), + task="multiclass", + num_classes=num_classes, + ) + + +def layerwise_infer(device, graph, nid, model, batch_size): + model.eval() + with torch.no_grad(): + pred = model.inference(graph, device, batch_size) # pred in buffer_device + pred = pred[nid] + label = graph.ndata["label"] + if isinstance(label, dict): + label = label["_N"] + label = label[nid].to(device).to(pred.device) + num_classes = pred.shape[1] + return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) + + +def train(args, device, g, dataset, model): + # create sampler & dataloader + train_idx = dataset.train_idx.to(device) + val_idx = dataset.val_idx.to(device) + + use_uva = args.mode == "mixed" + batch_size = 1024 + fanouts = [5, 10, 15] + sampler = NeighborSampler(fanouts) + train_dataloader = DataLoader( + g, + train_idx, + sampler, + device=device, + batch_size=batch_size, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + val_dataloader = DataLoader( + g, + val_idx, + sampler, + device=device, + batch_size=batch_size, + shuffle=True, + drop_last=False, + num_workers=0, + use_uva=use_uva, + ) + + opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) + + for epoch in range(10): + model.train() + total_loss = 0 + st = time.time() + for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): + if isinstance(g.ndata["feat"], dict): + x = g.ndata["feat"]["_N"][input_nodes] + y = g.ndata["label"]["_N"][output_nodes] + else: + x = g.ndata["feat"][input_nodes] + y = g.ndata["label"][output_nodes] + + y_hat = model(blocks, x) + loss = F.cross_entropy(y_hat, y) + opt.zero_grad() + loss.backward() + opt.step() + total_loss += loss.item() + + et = time.time() + + print( + f"Time taken for epoch {epoch} with batch_size {batch_size} = {et - st} s" + ) + acc = evaluate(model, g, val_dataloader) + print( + "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( + epoch, total_loss / (it + 1), acc.item() + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--mode", + default="gpu_cugraph_dgl", + choices=["cpu", "mixed", "gpu_dgl", "gpu_cugraph_dgl"], + help="Training mode." + " 'cpu' for CPU training," + " 'mixed' for CPU-GPU mixed training, " + " 'gpu_dgl' for pure-GPU training, " + " 'gpu_cugraph_dgl' for pure-GPU training.", + ) + args = parser.parse_args() + if not torch.cuda.is_available(): + args.mode = "cpu" + if args.mode == "gpu_cugraph_dgl": + set_allocators() + print(f"Training in {args.mode} mode.") + + # load and preprocess dataset + print("Loading data") + dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) + g = dataset[0] + g = dgl.add_self_loop(g) + if args.mode == "gpu_cugraph_dgl": + g = cugraph_dgl.cugraph_storage_from_heterograph(g.to("cuda")) + del dataset.g + + else: + g = g.to("cuda" if args.mode == "gpu_dgl" else "cpu") + device = torch.device( + "cpu" if args.mode == "cpu" or args.mode == "mixed" else "cuda" + ) + + # create GraphSAGE model + feat_shape = ( + g.get_node_storage(key="feat", ntype="_N") + .fetch(torch.LongTensor([0]).to(device), device=device) + .shape[1] + ) + print(feat_shape) + # no ndata in cugraph storage object + in_size = feat_shape + out_size = dataset.num_classes + model = SAGE(in_size, 256, out_size).to(device) + + # model training + print("Training...") + train(args, device, g, dataset, model) + + # test the model + print("Testing...") + acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) + print("Test Accuracy {:.4f}".format(acc.item())) diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py index 539fd86d136..56ac41c09b4 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,8 +17,10 @@ # Ignore Warning import warnings +import tempfile import time import cugraph_dgl +import cugraph_dgl.dataloading import torch import torch.nn as nn import torch.nn.functional as F @@ -76,14 +78,17 @@ def forward(self, blocks, x): def inference(self, g, device, batch_size): """Conduct layer-wise inference to get all the node embeddings.""" all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.get_node_storage(key="feat", ntype="_N").fetch( - all_node_ids, device=device - ) + feat = g.ndata["feat"][all_node_ids].to(device) - sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) - dataloader = DataLoader( + if isinstance(g, cugraph_dgl.Graph): + sampler = cugraph_dgl.dataloading.NeighborSampler([-1]) + loader_cls = cugraph_dgl.dataloading.FutureDataLoader + else: + sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) + loader_cls = DataLoader + dataloader = loader_cls( g, - torch.arange(g.num_nodes()).to(g.device), + torch.arange(g.num_nodes()).to(device), sampler, device=device, batch_size=batch_size, @@ -150,7 +155,7 @@ def layerwise_infer(device, graph, nid, model, batch_size): return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) -def train(args, device, g, dataset, model): +def train(args, device, g, dataset, model, directory): # create sampler & dataloader train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) @@ -158,8 +163,13 @@ def train(args, device, g, dataset, model): use_uva = args.mode == "mixed" batch_size = 1024 fanouts = [5, 10, 15] - sampler = NeighborSampler(fanouts) - train_dataloader = DataLoader( + if isinstance(g, cugraph_dgl.Graph): + sampler = cugraph_dgl.dataloading.NeighborSampler(fanouts, directory=directory) + loader_cls = cugraph_dgl.dataloading.FutureDataLoader + else: + sampler = NeighborSampler(fanouts) + loader_cls = DataLoader + train_dataloader = loader_cls( g, train_idx, sampler, @@ -170,7 +180,7 @@ def train(args, device, g, dataset, model): num_workers=0, use_uva=use_uva, ) - val_dataloader = DataLoader( + val_dataloader = loader_cls( g, val_idx, sampler, @@ -195,6 +205,7 @@ def train(args, device, g, dataset, model): else: x = g.ndata["feat"][input_nodes] y = g.ndata["label"][output_nodes] + y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() @@ -204,7 +215,9 @@ def train(args, device, g, dataset, model): et = time.time() - print(f"Time taken for epoch {epoch} with batch_size {batch_size} = {et-st} s") + print( + f"Time taken for epoch {epoch} with batch_size {batch_size} = {et - st} s" + ) acc = evaluate(model, g, val_dataloader) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( @@ -225,6 +238,8 @@ def train(args, device, g, dataset, model): " 'gpu_dgl' for pure-GPU training, " " 'gpu_cugraph_dgl' for pure-GPU training.", ) + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--tempdir_root", type=str, default=None) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" @@ -234,11 +249,13 @@ def train(args, device, g, dataset, model): # load and preprocess dataset print("Loading data") - dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) + dataset = AsNodePredDataset( + DglNodePropPredDataset("ogbn-products", root=args.dataset_root) + ) g = dataset[0] g = dgl.add_self_loop(g) if args.mode == "gpu_cugraph_dgl": - g = cugraph_dgl.cugraph_storage_from_heterograph(g.to("cuda")) + g = cugraph_dgl.cugraph_dgl_graph_from_heterograph(g.to("cuda")) del dataset.g else: @@ -248,19 +265,17 @@ def train(args, device, g, dataset, model): ) # create GraphSAGE model - feat_shape = ( - g.get_node_storage(key="feat", ntype="_N") - .fetch(torch.LongTensor([0]).to(device), device=device) - .shape[1] - ) - # no ndata in cugraph storage object + feat_shape = g.ndata["feat"].shape[1] + print(feat_shape) + in_size = feat_shape out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) # model training print("Training...") - train(args, device, g, dataset, model) + with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: + train(args, device, g, dataset, model, directory) # test the model print("Testing...") diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py index a6f771e4b51..3e0c0454905 100644 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -58,9 +58,8 @@ def inference(self, g, batch_size, device): # The nodes on each layer are of course splitted in batches. all_node_ids = torch.arange(0, g.num_nodes()).to(device) - feat = g.get_node_storage(key="feat", ntype="_N").fetch( - all_node_ids, device=device - ) + feat = g.ndata["feat"][all_node_ids].to(device) + sampler = dgl.dataloading.MultiLayerFullNeighborSampler( 1, prefetch_node_feats=["feat"] ) @@ -114,15 +113,13 @@ def layerwise_infer(graph, nid, model, batch_size, device): def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): - g.ndata["feat"]["_N"] = g.ndata["feat"]["_N"].to("cuda") - g.ndata["label"]["_N"] = g.ndata["label"]["_N"].to("cuda") st = time.time() model.train() for epoch in range(num_epochs): total_loss = 0 for _, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader): - x = g.ndata["feat"]["_N"][input_nodes] - y = g.ndata["label"]["_N"][output_nodes] + x = g.ndata["feat"][input_nodes].to(torch.float32) + y = g.ndata["label"][output_nodes].to(torch.int64) y_hat = model(blocks, x) y = y.squeeze(1) loss = F.cross_entropy(y_hat, y) @@ -137,7 +134,7 @@ def train_model(model, g, opt, train_dataloader, num_epochs, rank, val_nid): et = time.time() print( f"Total time taken for num_epochs {num_epochs} " - f"with batch_size {train_dataloader._batch_size} = {et-st} s on rank ={rank}" + f"with batch_size {train_dataloader._batch_size} = {et - st} s on rank ={rank}" ) if rank == 0: val_acc = layerwise_infer(g, val_nid, model, 1024 * 5, "cuda") diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py deleted file mode 100644 index 474f17dc2bb..00000000000 --- a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import dgl -import torch -import time -from distributed import Client, Event as Dask_Event -import tempfile -from cugraph.dask.comms import comms as Comms - - -def enable_spilling(): - import cudf - - cudf.set_option("spill", True) - - -def setup_cluster(dask_worker_devices): - dask_worker_devices_str = ",".join([str(i) for i in dask_worker_devices]) - from dask_cuda import LocalCUDACluster - - cluster = LocalCUDACluster( - protocol="tcp", - CUDA_VISIBLE_DEVICES=dask_worker_devices_str, - rmm_pool_size="25GB", - ) - - client = Client(cluster) - client.wait_for_workers(n_workers=len(dask_worker_devices)) - client.run(enable_spilling) - print("Dask Cluster Setup Complete") - del client - return cluster - - -def create_dask_client(scheduler_address): - from cugraph.dask.comms import comms as Comms - - client = Client(scheduler_address) - Comms.initialize(p2p=True) - return client - - -def initalize_pytorch_worker(dev_id): - import cupy as cp - import rmm - from rmm.allocators.torch import rmm_torch_allocator - from rmm.allocators.cupy import rmm_cupy_allocator - - dev = cp.cuda.Device( - dev_id - ) # Create cuda context on the right gpu, defaults to gpu-0 - dev.use() - rmm.reinitialize( - pool_allocator=True, - initial_pool_size=10e9, - maximum_pool_size=15e9, - devices=[dev_id], - ) - - if dev_id == 0: - torch.cuda.memory.change_current_allocator(rmm_torch_allocator) - - torch.cuda.set_device(dev_id) - cp.cuda.set_allocator(rmm_cupy_allocator) - enable_spilling() - print("device_id", dev_id, flush=True) - - -def load_dgl_dataset(dataset_name="ogbn-products"): - from ogb.nodeproppred import DglNodePropPredDataset - - dataset = DglNodePropPredDataset(name=dataset_name) - split_idx = dataset.get_idx_split() - train_idx, valid_idx, test_idx = ( - split_idx["train"], - split_idx["valid"], - split_idx["test"], - ) - g, label = dataset[0] - g.ndata["label"] = label - if len(g.etypes) <= 1: - g = dgl.add_self_loop(g) - else: - for etype in g.etypes: - if etype[0] == etype[2]: - # only add self loops for src->dst - g = dgl.add_self_loop(g, etype=etype) - - g = g.int() - train_idx = train_idx.int() - valid_idx = valid_idx.int() - test_idx = test_idx.int() - return g, train_idx, valid_idx, test_idx, dataset.num_classes - - -def create_cugraph_graphstore_from_dgl_dataset( - dataset_name="ogbn-products", single_gpu=False -): - from cugraph_dgl import cugraph_storage_from_heterograph - - dgl_g, train_idx, valid_idx, test_idx, num_classes = load_dgl_dataset(dataset_name) - cugraph_gs = cugraph_storage_from_heterograph(dgl_g, single_gpu=single_gpu) - return cugraph_gs, train_idx, valid_idx, test_idx, num_classes - - -def create_dataloader(gs, train_idx, device): - import cugraph_dgl - - temp_dir = tempfile.TemporaryDirectory() - sampler = cugraph_dgl.dataloading.NeighborSampler([10, 20]) - dataloader = cugraph_dgl.dataloading.DataLoader( - gs, - train_idx, - sampler, - sampling_output_dir=temp_dir.name, - batches_per_partition=10, - device=device, # Put the sampled MFGs on CPU or GPU - use_ddp=True, # Make it work with distributed data parallel - batch_size=1024, - shuffle=False, # Whether to shuffle the nodes for every epoch - drop_last=False, - num_workers=0, - ) - return dataloader - - -def run_workflow(rank, devices, scheduler_address): - from model import Sage, train_model - - # Below sets gpu_number - dev_id = devices[rank] - initalize_pytorch_worker(dev_id) - device = torch.device(f"cuda:{dev_id}") - # cugraph dask client initialization - client = create_dask_client(scheduler_address) - - # Pytorch training worker initialization - dist_init_method = "tcp://{master_ip}:{master_port}".format( - master_ip="127.0.0.1", master_port="12346" - ) - - torch.distributed.init_process_group( - backend="nccl", - init_method=dist_init_method, - world_size=len(devices), - rank=rank, - ) - - print(f"rank {rank}.", flush=True) - print("Initalized across GPUs.") - - event = Dask_Event("cugraph_gs_creation_event") - if rank == 0: - ( - gs, - train_idx, - valid_idx, - test_idx, - num_classes, - ) = create_cugraph_graphstore_from_dgl_dataset( - "ogbn-products", single_gpu=False - ) - client.publish_dataset(cugraph_gs=gs) - client.publish_dataset(train_idx=train_idx) - client.publish_dataset(valid_idx=valid_idx) - client.publish_dataset(test_idx=test_idx) - client.publish_dataset(num_classes=num_classes) - event.set() - else: - if event.wait(timeout=1000): - gs = client.get_dataset("cugraph_gs") - train_idx = client.get_dataset("train_idx") - valid_idx = client.get_dataset("valid_idx") - test_idx = client.get_dataset("test_idx") - num_classes = client.get_dataset("num_classes") - else: - raise RuntimeError(f"Fetch cugraph_gs to worker_id {rank} failed") - - torch.distributed.barrier() - print(f"Loading cugraph_store to worker {rank} is complete", flush=True) - dataloader = create_dataloader(gs, train_idx, device) - print("Data Loading Complete", flush=True) - num_feats = gs.ndata["feat"]["_N"].shape[1] - hid_size = 256 - # Load Training example - model = Sage(num_feats, hid_size, num_classes).to(device) - model = torch.nn.parallel.DistributedDataParallel( - model, - device_ids=[device], - output_device=device, - ) - torch.distributed.barrier() - n_epochs = 10 - total_st = time.time() - opt = torch.optim.Adam(model.parameters(), lr=0.01) - train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) - torch.distributed.barrier() - total_et = time.time() - print( - f"Total time taken on n_epochs {n_epochs} = {total_et-total_st} s", - f"measured by worker = {rank}", - ) - - # cleanup dask cluster - if rank == 0: - client.unpublish_dataset("cugraph_gs") - client.unpublish_dataset("train_idx") - client.unpublish_dataset("valid_idx") - client.unpublish_dataset("test_idx") - event.clear() - print("Workflow completed") - print("---" * 10) - Comms.destroy() - - -if __name__ == "__main__": - # Load dummy first - # because new environments - # require dataset download - load_dgl_dataset() - dask_worker_devices = [5, 6] - cluster = setup_cluster(dask_worker_devices) - - trainer_devices = [0, 1, 2] - import torch.multiprocessing as mp - - mp.spawn( - run_workflow, - args=(trainer_devices, cluster.scheduler_address), - nprocs=len(trainer_devices), - ) - Comms.destroy() - cluster.close() diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py new file mode 100644 index 00000000000..11afe466014 --- /dev/null +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_mnmg.py @@ -0,0 +1,311 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dgl +import torch +import time +import tempfile +import argparse +import json +import os +import warnings + +from datetime import timedelta + +import cugraph_dgl + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + +# Allow computation on objects that are larger than GPU memory +# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory +os.environ["CUDF_SPILL"] = "1" + + +def init_ddp_worker(global_rank, local_rank, world_size, cugraph_id): + import rmm + + rmm.reinitialize( + devices=local_rank, + managed_memory=True, + pool_allocator=True, + ) + + import cupy + + cupy.cuda.Device(local_rank).use() + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + torch.cuda.set_device(local_rank) + + cugraph_comms_init( + rank=global_rank, world_size=world_size, uid=cugraph_id, device=local_rank + ) + + wm_init(global_rank, world_size, local_rank, torch.cuda.device_count()) + + +def load_dgl_dataset(dataset_root="dataset", dataset_name="ogbn-products"): + from ogb.nodeproppred import DglNodePropPredDataset + + dataset = DglNodePropPredDataset(root=dataset_root, name=dataset_name) + split_idx = dataset.get_idx_split() + train_idx, valid_idx, test_idx = ( + split_idx["train"], + split_idx["valid"], + split_idx["test"], + ) + g, label = dataset[0] + g.ndata["label"] = label + if len(g.etypes) <= 1: + g = dgl.add_self_loop(g) + else: + for etype in g.etypes: + if etype[0] == etype[2]: + # only add self loops for src->dst + g = dgl.add_self_loop(g, etype=etype) + + g = g.int() + idx = { + "train": train_idx.int(), + "valid": valid_idx.int(), + "test": test_idx.int(), + } + + return g, idx, dataset.num_classes + + +def partition_data( + g, split_idx, num_classes, edge_path, feature_path, label_path, meta_path +): + # Split and save edge index + os.makedirs( + edge_path, + exist_ok=True, + ) + src, dst = g.all_edges(form="uv", order="eid") + edge_index = torch.stack([src, dst]) + for (r, e) in enumerate(torch.tensor_split(edge_index, world_size, dim=1)): + rank_path = os.path.join(edge_path, f"rank={r}.pt") + torch.save( + e.clone(), + rank_path, + ) + + # Split and save features + os.makedirs( + feature_path, + exist_ok=True, + ) + + nix = torch.arange(g.num_nodes()) + for (r, f) in enumerate(torch.tensor_split(nix, world_size)): + feat_path = os.path.join(feature_path, f"rank={r}_feat.pt") + torch.save(g.ndata["feat"][f], feat_path) + + label_f_path = os.path.join(feature_path, f"rank={r}_label.pt") + torch.save(g.ndata["label"][f], label_f_path) + + # Split and save labels + os.makedirs( + label_path, + exist_ok=True, + ) + for (d, i) in split_idx.items(): + i_parts = torch.tensor_split(i, world_size) + for r, i_part in enumerate(i_parts): + rank_path = os.path.join(label_path, f"rank={r}") + os.makedirs(rank_path, exist_ok=True) + torch.save(i_part, os.path.join(rank_path, f"{d}.pt")) + + # Save metadata + meta = { + "num_classes": int(num_classes), + "num_nodes": int(g.num_nodes()), + } + with open(meta_path, "w") as f: + json.dump(meta, f) + + +def load_partitioned_data(rank, edge_path, feature_path, label_path, meta_path): + g = cugraph_dgl.Graph( + is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" + ) + + # Load metadata + with open(meta_path, "r") as f: + meta = json.load(f) + + # Load labels + split_idx = {} + for split in ["train", "test", "valid"]: + split_idx[split] = torch.load( + os.path.join(label_path, f"rank={rank}", f"{split}.pt") + ) + + # Load features + feat_t = torch.load(os.path.join(feature_path, f"rank={rank}_feat.pt")) + label_f_t = torch.load(os.path.join(feature_path, f"rank={rank}_label.pt")) + ndata = {"feat": feat_t, "label": label_f_t} + g.add_nodes(meta["num_nodes"], data=ndata) + + # Load edge index + src, dst = torch.load(os.path.join(edge_path, f"rank={rank}.pt")) + g.add_edges(src.cuda(), dst.cuda(), data=None) + + return g, split_idx, meta["num_classes"] + + +def create_dataloader(gs, train_idx, device, temp_dir, stage): + import cugraph_dgl + + temp_path = os.path.join(temp_dir, f"{stage}_{device}") + os.mkdir(temp_path) + + sampler = cugraph_dgl.dataloading.NeighborSampler( + [10, 20], + directory=temp_path, + batches_per_partition=10, + ) + + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + gs, + train_idx, + sampler, + device=device, # Put the sampled MFGs on CPU or GPU + use_ddp=True, # Make it work with distributed data parallel + batch_size=1024, + shuffle=False, # Whether to shuffle the nodes for every epoch + drop_last=False, + num_workers=0, + ) + return dataloader + + +def run_workflow( + global_rank, local_rank, world_size, g, split_idx, num_classes, temp_dir +): + from model import Sage, train_model + + # Below sets gpu_number + dev_id = local_rank + device = torch.device(f"cuda:{dev_id}") + + dataloader = create_dataloader(g, split_idx["train"], device, temp_dir, "train") + print("Dataloader Creation Complete", flush=True) + num_feats = g.ndata["feat"].shape[1] + hid_size = 256 + # Load Training example + model = Sage(num_feats, hid_size, num_classes).to(device) + model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device, + ) + torch.distributed.barrier() + n_epochs = 10 + total_st = time.time() + opt = torch.optim.Adam(model.parameters(), lr=0.01) + train_model(model, g, opt, dataloader, n_epochs, global_rank, split_idx["valid"]) + torch.distributed.barrier() + total_et = time.time() + print( + f"Total time taken on n_epochs {n_epochs} = {total_et - total_st} s", + f"measured by worker = {global_rank}", + ) + + wm_finalize() + cugraph_comms_shutdown() + + +if __name__ == "__main__": + if "LOCAL_RANK" in os.environ: + parser = argparse.ArgumentParser() + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--tempdir_root", type=str, default=None) + parser.add_argument("--dataset", type=str, default="ogbn-products") + parser.add_argument("--skip_partition", action="store_true") + args = parser.parse_args() + + torch.distributed.init_process_group( + "nccl", + timeout=timedelta(minutes=60), + ) + world_size = torch.distributed.get_world_size() + global_rank = torch.distributed.get_rank() + local_rank = int(os.environ["LOCAL_RANK"]) + device = torch.device(local_rank) + + # Create the uid needed for cuGraph comms + if global_rank == 0: + cugraph_id = [cugraph_comms_create_unique_id()] + else: + cugraph_id = [None] + torch.distributed.broadcast_object_list(cugraph_id, src=0, device=device) + cugraph_id = cugraph_id[0] + + init_ddp_worker(global_rank, local_rank, world_size, cugraph_id) + + # Split the data + edge_path = os.path.join(args.dataset_root, args.dataset + "_eix_part") + feature_path = os.path.join(args.dataset_root, args.dataset + "_fea_part") + label_path = os.path.join(args.dataset_root, args.dataset + "_label_part") + meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") + + if not args.skip_partition and global_rank == 0: + partition_data( + *load_dgl_dataset(args.dataset_root, args.dataset), + edge_path, + feature_path, + label_path, + meta_path, + ) + torch.distributed.barrier() + + print("loading partitions...") + g, split_idx, num_classes = load_partitioned_data( + rank=global_rank, + edge_path=edge_path, + feature_path=feature_path, + label_path=label_path, + meta_path=meta_path, + ) + print(f"rank {global_rank} has loaded its partition") + torch.distributed.barrier() + + with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: + run_workflow( + global_rank, + local_rank, + world_size, + g, + split_idx, + num_classes, + directory, + ) + else: + warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py new file mode 100644 index 00000000000..001d7fb82dc --- /dev/null +++ b/python/cugraph-dgl/examples/multi_trainer_MG_example/workflow_snmg.py @@ -0,0 +1,242 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dgl +import torch +import time +import tempfile +import argparse +import os + +import cugraph_dgl + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_shutdown, + cugraph_comms_create_unique_id, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + +# Allow computation on objects that are larger than GPU memory +# https://docs.rapids.ai/api/cudf/stable/developer_guide/library_design/#spilling-to-host-memory +os.environ["CUDF_SPILL"] = "1" + + +def initalize_pytorch_worker(dev_id): + import cupy as cp + import rmm + from rmm.allocators.cupy import rmm_cupy_allocator + + dev = cp.cuda.Device( + dev_id + ) # Create cuda context on the right gpu, defaults to gpu-0 + dev.use() + rmm.reinitialize( + pool_allocator=True, + initial_pool_size=10e9, + maximum_pool_size=15e9, + devices=[dev_id], + ) + + from cugraph.testing.mg_utils import enable_spilling + + enable_spilling() + + torch.cuda.set_device(dev_id) + cp.cuda.set_allocator(rmm_cupy_allocator) + print("device_id", dev_id, flush=True) + + +def load_dgl_dataset( + dataset_name="ogbn-products", + dataset_root=None, +): + from ogb.nodeproppred import DglNodePropPredDataset + + dataset = DglNodePropPredDataset(name=dataset_name, root=dataset_root) + split_idx = dataset.get_idx_split() + train_idx, valid_idx, test_idx = ( + split_idx["train"], + split_idx["valid"], + split_idx["test"], + ) + g, label = dataset[0] + g.ndata["label"] = label + if len(g.etypes) <= 1: + g = dgl.add_self_loop(g) + else: + for etype in g.etypes: + if etype[0] == etype[2]: + # only add self loops for src->dst + g = dgl.add_self_loop(g, etype=etype) + + g = g.int() + train_idx = train_idx.int() + valid_idx = valid_idx.int() + test_idx = test_idx.int() + return g, train_idx, valid_idx, test_idx, dataset.num_classes + + +def create_cugraph_graphstore_from_dgl_dataset(dataset, rank, world_size): + (g, train_idx, valid_idx, test_idx, num_classes) = dataset + # Partition the data + cg = cugraph_dgl.Graph( + is_multi_gpu=True, ndata_storage="wholegraph", edata_storage="wholegraph" + ) + + nix = torch.tensor_split(torch.arange(g.num_nodes()), world_size)[rank] + ndata = {k: g.ndata[k][nix].cuda() for k in g.ndata.keys()} + + eix = torch.tensor_split(torch.arange(g.num_edges()), world_size)[rank] + src, dst = g.all_edges(form="uv", order="eid") + edata = {k: g.edata[k][eix].cuda() for k in g.edata.keys()} + + cg.add_nodes(g.num_nodes(), data=ndata) + cg.add_edges( + torch.tensor_split(src, world_size)[rank].cuda(), + torch.tensor_split(dst, world_size)[rank].cuda(), + data=edata, + ) + + return ( + cg, + torch.tensor_split(train_idx, world_size)[rank].to(torch.int64), + torch.tensor_split(valid_idx, world_size)[rank].to(torch.int64), + torch.tensor_split(test_idx, world_size)[rank].to(torch.int64), + num_classes, + ) + + +def create_dataloader(gs, train_idx, device, temp_dir, stage): + import cugraph_dgl + + temp_path = os.path.join(temp_dir, f"{stage}_{device}") + os.mkdir(temp_path) + + sampler = cugraph_dgl.dataloading.NeighborSampler( + [10, 20], + directory=temp_path, + batches_per_partition=10, + ) + dataloader = cugraph_dgl.dataloading.FutureDataLoader( + gs, + train_idx, + sampler, + device=device, # Put the sampled MFGs on CPU or GPU + use_ddp=True, # Make it work with distributed data parallel + batch_size=1024, + shuffle=False, # Whether to shuffle the nodes for every epoch + drop_last=False, + num_workers=0, + ) + return dataloader + + +def run_workflow(rank, world_size, cugraph_id, dataset, temp_dir): + from model import Sage, train_model + + # Below sets gpu_number + dev_id = rank + initalize_pytorch_worker(dev_id) + device = torch.device(f"cuda:{dev_id}") + + # Pytorch training worker initialization + dist_init_method = "tcp://{master_ip}:{master_port}".format( + master_ip="127.0.0.1", master_port="12346" + ) + + torch.distributed.init_process_group( + backend="nccl", + init_method=dist_init_method, + world_size=world_size, + rank=rank, + ) + + cugraph_comms_init(rank=rank, world_size=world_size, uid=cugraph_id, device=rank) + wm_init(rank, world_size, rank, world_size) + + print(f"rank {rank}.", flush=True) + print("Initalized across GPUs.") + + ( + gs, + train_idx, + valid_idx, + test_idx, + num_classes, + ) = create_cugraph_graphstore_from_dgl_dataset( + dataset, + rank, + world_size, + ) + del dataset + + torch.distributed.barrier() + print(f"Loading graph to worker {rank} is complete", flush=True) + + dataloader = create_dataloader(gs, train_idx, device, temp_dir, "train") + print("Dataloader Creation Complete", flush=True) + num_feats = gs.ndata["feat"].shape[1] + hid_size = 256 + # Load Training example + model = Sage(num_feats, hid_size, num_classes).to(device) + model = torch.nn.parallel.DistributedDataParallel( + model, + device_ids=[device], + output_device=device, + ) + torch.distributed.barrier() + n_epochs = 10 + total_st = time.time() + opt = torch.optim.Adam(model.parameters(), lr=0.01) + train_model(model, gs, opt, dataloader, n_epochs, rank, valid_idx) + torch.distributed.barrier() + total_et = time.time() + print( + f"Total time taken on n_epochs {n_epochs} = {total_et - total_st} s", + f"measured by worker = {rank}", + ) + + torch.cuda.synchronize() + wm_finalize() + cugraph_comms_shutdown() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--tempdir_root", type=str, default=None) + parser.add_argument("--dataset", type=str, default="ogbn-products") + args = parser.parse_args() + + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + # Create the uid needed for cuGraph comms + cugraph_id = cugraph_comms_create_unique_id() + + ds = load_dgl_dataset(args.dataset, args.dataset_root) + + world_size = torch.cuda.device_count() + + with tempfile.TemporaryDirectory(dir=args.tempdir_root) as directory: + torch.multiprocessing.spawn( + run_workflow, + args=(world_size, cugraph_id, ds, directory), + nprocs=world_size, + ) diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml index ba2bb4bc170..e3e12216ac7 100644 --- a/python/cugraph-dgl/pyproject.toml +++ b/python/cugraph-dgl/pyproject.toml @@ -18,29 +18,29 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", ] dependencies = [ - "cugraph==24.10.*,>=0.0.0a0", + "cugraph==24.12.*,>=0.0.0a0", "numba>=0.57", - "numpy>=1.23,<2.0a0", - "pylibcugraphops==24.10.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", + "pylibcugraphops==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] test = [ "pandas", - "pylibwholegraph==24.10.*,>=0.0.0a0", + "pylibwholegraph==24.12.*,>=0.0.0a0", "pytest", "pytest-benchmark", "pytest-cov", "pytest-xdist", "scipy", "tensordict>=0.1.2", - "torch>=2.0,<2.2.0a0", + "torch>=2.3,<2.4.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/cugraph-equivariant/cugraph_equivariant/tests/pytest.ini b/python/cugraph-equivariant/cugraph_equivariant/tests/pytest.ini new file mode 100644 index 00000000000..7b0a9f29fb1 --- /dev/null +++ b/python/cugraph-equivariant/cugraph_equivariant/tests/pytest.ini @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native diff --git a/python/cugraph-equivariant/pyproject.toml b/python/cugraph-equivariant/pyproject.toml index e4a8d290d9e..7713e89ac20 100644 --- a/python/cugraph-equivariant/pyproject.toml +++ b/python/cugraph-equivariant/pyproject.toml @@ -28,16 +28,16 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dependencies = [ - "pylibcugraphops==24.10.*,>=0.0.0a0", + "pylibcugraphops==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] diff --git a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml index bd1ca33af70..39b1ab21edb 100644 --- a/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml +++ b/python/cugraph-pyg/conda/cugraph_pyg_dev_cuda-118.yaml @@ -9,17 +9,17 @@ channels: - conda-forge - nvidia dependencies: -- cugraph==24.10.*,>=0.0.0a0 +- cugraph==24.12.*,>=0.0.0a0 - pandas - pre-commit - pyg>=2.5,<2.6 -- pylibcugraphops==24.10.*,>=0.0.0a0 +- pylibcugraphops==24.12.*,>=0.0.0a0 - pytest - pytest-benchmark - pytest-cov - pytest-xdist - pytorch-cuda==11.8 -- pytorch>=2.0 +- pytorch>=2.3,<2.4.0a0 - scipy - tensordict>=0.1.2 name: cugraph_pyg_dev_cuda-118 diff --git a/python/cugraph-pyg/cugraph_pyg/__init__.py b/python/cugraph-pyg/cugraph_pyg/__init__.py index 719751c966a..e566e6e9fdd 100644 --- a/python/cugraph-pyg/cugraph_pyg/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,3 +12,8 @@ # limitations under the License. from cugraph_pyg._version import __git_commit__, __version__ + +import cugraph_pyg.data +import cugraph_pyg.loader +import cugraph_pyg.sampler +import cugraph_pyg.nn diff --git a/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py index c805cd496c8..6195f3118a4 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/dask_graph_store.py @@ -150,7 +150,7 @@ def is_set(self, key): if key not in self.__dataclass_fields__: raise KeyError(key) attr = getattr(self, key) - return type(attr) != _field_status or attr != _field_status.UNSET + return type(attr) is not _field_status or attr != _field_status.UNSET def is_fully_specified(self): """ diff --git a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py index e086bf07b1f..c47dda5eaa5 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/graph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/graph_store.py @@ -21,7 +21,7 @@ from cugraph.utilities.utils import import_optional, MissingModule from cugraph.gnn.comms import cugraph_comms_get_raft_handle -from typing import Union, Optional, List, Dict +from typing import Union, Optional, List, Dict, Tuple # Have to use import_optional even though these are required @@ -58,13 +58,19 @@ def __init__(self, is_multi_gpu: bool = False): """ self.__edge_indices = tensordict.TensorDict({}, batch_size=(2,)) self.__sizes = {} - self.__graph = None - self.__vertex_offsets = None + self.__handle = None self.__is_multi_gpu = is_multi_gpu + self.__clear_graph() + super().__init__() + def __clear_graph(self): + self.__graph = None + self.__vertex_offsets = None + self.__weight_attr = None + def _put_edge_index( self, edge_index: "torch_geometric.typing.EdgeTensorType", @@ -88,8 +94,7 @@ def _put_edge_index( self.__sizes[edge_attr.edge_type] = edge_attr.size # invalidate the graph - self.__graph = None - self.__vertex_offsets = None + self.__clear_graph() return True def _get_edge_index( @@ -108,7 +113,7 @@ def _remove_edge_index(self, edge_attr: "torch_geometric.data.EdgeAttr") -> bool del self.__edge_indices[edge_attr.edge_type] # invalidate the graph - self.__graph = None + self.__clear_graph() return True def get_all_edge_attrs(self) -> List["torch_geometric.data.EdgeAttr"]: @@ -163,6 +168,9 @@ def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: vertices_array=[vertices_array], edge_id_array=[cupy.asarray(edgelist_dict["eid"])], edge_type_array=[cupy.asarray(edgelist_dict["etp"])], + weight_array=[cupy.asarray(edgelist_dict["wgt"])] + if "wgt" in edgelist_dict + else None, ) else: self.__graph = pylibcugraph.SGGraph( @@ -175,6 +183,9 @@ def _graph(self) -> Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph]: ), edge_id_array=cupy.asarray(edgelist_dict["eid"]), edge_type_array=cupy.asarray(edgelist_dict["etp"]), + weight_array=cupy.asarray(edgelist_dict["wgt"]) + if "wgt" in edgelist_dict + else None, ) return self.__graph @@ -194,13 +205,18 @@ def _num_vertices(self) -> Dict[str, int]: else edge_attr.size[1] ) else: - if edge_attr.edge_type[0] not in num_vertices: + if edge_attr.edge_type[0] != edge_attr.edge_type[2]: + if edge_attr.edge_type[0] not in num_vertices: + num_vertices[edge_attr.edge_type[0]] = int( + self.__edge_indices[edge_attr.edge_type][0].max() + 1 + ) + if edge_attr.edge_type[2] not in num_vertices: + num_vertices[edge_attr.edge_type[1]] = int( + self.__edge_indices[edge_attr.edge_type][1].max() + 1 + ) + elif edge_attr.edge_type[0] not in num_vertices: num_vertices[edge_attr.edge_type[0]] = int( - self.__edge_indices[edge_attr.edge_type][0].max() + 1 - ) - if edge_attr.edge_type[2] not in num_vertices: - num_vertices[edge_attr.edge_type[1]] = int( - self.__edge_indices[edge_attr.edge_type][1].max() + 1 + self.__edge_indices[edge_attr.edge_type].max() + 1 ) if self.is_multi_gpu: @@ -228,6 +244,32 @@ def _vertex_offsets(self) -> Dict[str, int]: def is_homogeneous(self) -> bool: return len(self._vertex_offsets) == 1 + def _set_weight_attr(self, attr: Tuple["torch_geometric.data.FeatureStore", str]): + if attr != self.__weight_attr: + self.__clear_graph() + self.__weight_attr = attr + + def __get_weight_tensor( + self, + sorted_keys: List[Tuple[str, str, str]], + start_offsets: "torch.Tensor", + num_edges_t: "torch.Tensor", + ): + feature_store, attr_name = self.__weight_attr + + weights = [] + for i, et in enumerate(sorted_keys): + ix = torch.arange( + start_offsets[i], + start_offsets[i] + num_edges_t[i], + dtype=torch.int64, + device="cpu", + ) + + weights.append(feature_store[et, attr_name][ix]) + + return torch.concat(weights) + def __get_edgelist(self): """ Returns @@ -275,59 +317,49 @@ def __get_edgelist(self): ) ) + num_edges_t = torch.tensor( + [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" + ) + if self.is_multi_gpu: rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() - num_edges_t = torch.tensor( - [self.__edge_indices[et].shape[1] for et in sorted_keys], device="cuda" - ) num_edges_all_t = torch.empty( world_size, num_edges_t.numel(), dtype=torch.int64, device="cuda" ) torch.distributed.all_gather_into_tensor(num_edges_all_t, num_edges_t) - if rank > 0: - start_offsets = num_edges_all_t[:rank].T.sum(axis=1) - edge_id_array = torch.concat( - [ - torch.arange( - start_offsets[i], - start_offsets[i] + num_edges_all_t[rank][i], - dtype=torch.int64, - device="cuda", - ) - for i in range(len(sorted_keys)) - ] - ) - else: - edge_id_array = torch.concat( - [ - torch.arange( - self.__edge_indices[et].shape[1], - dtype=torch.int64, - device="cuda", - ) - for et in sorted_keys - ] - ) - + start_offsets = num_edges_all_t[:rank].T.sum(axis=1) else: - # single GPU - edge_id_array = torch.concat( - [ - torch.arange( - self.__edge_indices[et].shape[1], - dtype=torch.int64, - device="cuda", - ) - for et in sorted_keys - ] + rank = 0 + start_offsets = torch.zeros( + (len(sorted_keys),), dtype=torch.int64, device="cuda" ) + num_edges_all_t = num_edges_t.reshape((1, num_edges_t.numel())) + + edge_id_array = torch.concat( + [ + torch.arange( + start_offsets[i], + start_offsets[i] + num_edges_all_t[rank][i], + dtype=torch.int64, + device="cuda", + ) + for i in range(len(sorted_keys)) + ] + ) - return { + d = { "dst": edge_index[0], "src": edge_index[1], "etp": edge_type_array, "eid": edge_id_array, } + + if self.__weight_attr is not None: + d["wgt"] = self.__get_weight_tensor( + sorted_keys, start_offsets.cpu(), num_edges_t.cpu() + ).cuda() + + return d diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py index 7002d7ebded..127ca809d91 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_mnmg.py @@ -185,6 +185,8 @@ def run_train( wall_clock_start, tempdir=None, num_layers=3, + in_memory=False, + seeds_per_call=-1, ): optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0005) @@ -196,20 +198,23 @@ def run_train( from cugraph_pyg.loader import NeighborLoader ix_train = split_idx["train"].cuda() - train_path = os.path.join(tempdir, f"train_{global_rank}") - os.mkdir(train_path) + train_path = None if in_memory else os.path.join(tempdir, f"train_{global_rank}") + if train_path: + os.mkdir(train_path) train_loader = NeighborLoader( data, input_nodes=ix_train, directory=train_path, shuffle=True, drop_last=True, + local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, **kwargs, ) ix_test = split_idx["test"].cuda() - test_path = os.path.join(tempdir, f"test_{global_rank}") - os.mkdir(test_path) + test_path = None if in_memory else os.path.join(tempdir, f"test_{global_rank}") + if test_path: + os.mkdir(test_path) test_loader = NeighborLoader( data, input_nodes=ix_test, @@ -221,14 +226,16 @@ def run_train( ) ix_valid = split_idx["valid"].cuda() - valid_path = os.path.join(tempdir, f"valid_{global_rank}") - os.mkdir(valid_path) + valid_path = None if in_memory else os.path.join(tempdir, f"valid_{global_rank}") + if valid_path: + os.mkdir(valid_path) valid_loader = NeighborLoader( data, input_nodes=ix_valid, directory=valid_path, shuffle=True, drop_last=True, + local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, **kwargs, ) @@ -347,6 +354,9 @@ def parse_args(): parser.add_argument("--skip_partition", action="store_true") parser.add_argument("--wg_mem_type", type=str, default="distributed") + parser.add_argument("--in_memory", action="store_true", default=False) + parser.add_argument("--seeds_per_call", type=int, default=-1) + return parser.parse_args() @@ -429,6 +439,8 @@ def parse_args(): wall_clock_start, tempdir, args.num_layers, + args.in_memory, + args.seeds_per_call, ) else: warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py index b299fc2a1a1..0f9c39bf04d 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_sg.py @@ -66,7 +66,7 @@ def train(epoch: int): torch.cuda.synchronize() print( f"Average Training Iteration Time (s/iter): \ - {(time.perf_counter() - start_avg_time)/(i-warmup_steps):.6f}" + {(time.perf_counter() - start_avg_time) / (i - warmup_steps):.6f}" ) @@ -91,10 +91,20 @@ def test(loader: NeighborLoader, val_steps: Optional[int] = None): def create_loader( - data, num_neighbors, input_nodes, replace, batch_size, samples_dir, stage_name + data, + num_neighbors, + input_nodes, + replace, + batch_size, + samples_dir, + stage_name, + local_seeds_per_call, ): - directory = os.path.join(samples_dir, stage_name) - os.mkdir(directory) + if samples_dir is not None: + directory = os.path.join(samples_dir, stage_name) + os.mkdir(directory) + else: + directory = None return NeighborLoader( data, num_neighbors=num_neighbors, @@ -102,6 +112,7 @@ def create_loader( replace=replace, batch_size=batch_size, directory=directory, + local_seeds_per_call=local_seeds_per_call, ) @@ -147,6 +158,8 @@ def parse_args(): parser.add_argument("--tempdir_root", type=str, default=None) parser.add_argument("--dataset_root", type=str, default="dataset") parser.add_argument("--dataset", type=str, default="ogbn-products") + parser.add_argument("--in_memory", action="store_true", default=False) + parser.add_argument("--seeds_per_call", type=int, default=-1) return parser.parse_args() @@ -170,7 +183,10 @@ def parse_args(): "num_neighbors": [args.fan_out] * args.num_layers, "replace": False, "batch_size": args.batch_size, - "samples_dir": samples_dir, + "samples_dir": None if args.in_memory else samples_dir, + "local_seeds_per_call": None + if args.seeds_per_call <= 0 + else args.seeds_per_call, } train_loader = create_loader( diff --git a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py index b1bb0240e71..73efbc92a24 100644 --- a/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py +++ b/python/cugraph-pyg/cugraph_pyg/examples/gcn_dist_snmg.py @@ -86,6 +86,8 @@ def run_train( wall_clock_start, tempdir=None, num_layers=3, + in_memory=False, + seeds_per_call=-1, ): init_pytorch_worker( @@ -119,20 +121,23 @@ def run_train( dist.barrier() ix_train = torch.tensor_split(split_idx["train"], world_size)[rank].cuda() - train_path = os.path.join(tempdir, f"train_{rank}") - os.mkdir(train_path) + train_path = None if in_memory else os.path.join(tempdir, f"train_{rank}") + if train_path: + os.mkdir(train_path) train_loader = NeighborLoader( (feature_store, graph_store), input_nodes=ix_train, directory=train_path, shuffle=True, drop_last=True, + local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, **kwargs, ) ix_test = torch.tensor_split(split_idx["test"], world_size)[rank].cuda() - test_path = os.path.join(tempdir, f"test_{rank}") - os.mkdir(test_path) + test_path = None if in_memory else os.path.join(tempdir, f"test_{rank}") + if test_path: + os.mkdir(test_path) test_loader = NeighborLoader( (feature_store, graph_store), input_nodes=ix_test, @@ -144,14 +149,16 @@ def run_train( ) ix_valid = torch.tensor_split(split_idx["valid"], world_size)[rank].cuda() - valid_path = os.path.join(tempdir, f"valid_{rank}") - os.mkdir(valid_path) + valid_path = None if in_memory else os.path.join(tempdir, f"valid_{rank}") + if valid_path: + os.mkdir(valid_path) valid_loader = NeighborLoader( (feature_store, graph_store), input_nodes=ix_valid, directory=valid_path, shuffle=True, drop_last=True, + local_seeds_per_call=seeds_per_call if seeds_per_call > 0 else None, **kwargs, ) @@ -269,6 +276,8 @@ def run_train( parser.add_argument("--tempdir_root", type=str, default=None) parser.add_argument("--dataset_root", type=str, default="dataset") parser.add_argument("--dataset", type=str, default="ogbn-products") + parser.add_argument("--in_memory", action="store_true", default=False) + parser.add_argument("--seeds_per_call", type=int, default=-1) parser.add_argument( "--n_devices", @@ -322,6 +331,8 @@ def run_train( wall_clock_start, tempdir, args.num_layers, + args.in_memory, + args.seeds_per_call, ), nprocs=world_size, join=True, diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py new file mode 100644 index 00000000000..5c75e01e6f5 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_mnmg.py @@ -0,0 +1,418 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This example illustrates link classification using the ogbl-wikikg2 dataset. + +import os +import json +import argparse +import warnings + +import torch + +import torch.nn.functional as F +from torch.nn import Parameter +from torch_geometric.nn import FastRGCNConv, GAE +from torch.nn.parallel import DistributedDataParallel + +from ogb.linkproppred import PygLinkPropPredDataset + +import cugraph_pyg + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_create_unique_id, + cugraph_comms_shutdown, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + + +# Enable cudf spilling to save gpu memory +from cugraph.testing.mg_utils import enable_spilling + +# Ensures that a CUDA context is not created on import of rapids. +# Allows pytorch to create the context instead +os.environ["RAPIDS_NO_INITIALIZE"] = "1" + + +def init_pytorch_worker(global_rank, local_rank, world_size, uid): + import rmm + + rmm.reinitialize(devices=[local_rank], pool_allocator=True, managed_memory=True) + + import cupy + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + cugraph_comms_init( + global_rank, + world_size, + uid, + local_rank, + ) + + wm_init(global_rank, world_size, local_rank, torch.cuda.device_count()) + + enable_spilling() + + +class RGCNEncoder(torch.nn.Module): + def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): + super().__init__() + self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) + self.conv1 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.conv2 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.reset_parameters() + + def reset_parameters(self): + torch.nn.init.xavier_uniform_(self.node_emb) + self.conv1.reset_parameters() + self.conv2.reset_parameters() + + def forward(self, edge_index, edge_type): + x = self.node_emb + x = self.conv1(x, edge_index, edge_type).relu_() + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv2(x, edge_index, edge_type) + return x + + +def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=None): + model.train() + optimizer.zero_grad() + + for i, batch in enumerate(train_loader): + r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() + z = model.encode(batch.edge_index, r) + + loss = model.recon_loss(z, batch.edge_index) + loss.backward() + optimizer.step() + + if i % 10 == 0: + print( + f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}", flush=True + ) + if num_steps and i == num_steps: + break + + +def test(stage, epoch, model, loader, num_steps=None): + # TODO support ROC-AUC metric + # Predict probabilities of future edges + model.eval() + + rr = 0.0 + for i, (h, h_neg, t, t_neg, r) in enumerate(loader): + if num_steps and i >= num_steps: + break + + ei = torch.concatenate( + [ + torch.stack([h, t]).cuda(), + torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), + ], + dim=-1, + ) + + r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() + + z = model.encode(ei, r) + q = model.decode(z, ei) + + _, ix = torch.sort(q, descending=True) + rr += 1.0 / (1.0 + ix[0]) + + print(f"epoch {epoch:02d} {stage} mrr:", rr / i, flush=True) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--hidden_channels", type=int, default=128) + parser.add_argument("--num_layers", type=int, default=1) + parser.add_argument("--lr", type=float, default=0.001) + parser.add_argument("--epochs", type=int, default=4) + parser.add_argument("--batch_size", type=int, default=16384) + parser.add_argument("--num_neg", type=int, default=500) + parser.add_argument("--num_pos", type=int, default=-1) + parser.add_argument("--fan_out", type=int, default=10) + parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--seeds_per_call", type=int, default=-1) + parser.add_argument("--n_devices", type=int, default=-1) + parser.add_argument("--skip_partition", action="store_true") + + return parser.parse_args() + + +def run_train(rank, world_size, model, data, edge_feature_store, meta, splits, args): + model = model.to(rank) + model = GAE(DistributedDataParallel(model, device_ids=[rank])) + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) + + eli = torch.stack([splits["train"]["head"], splits["train"]["tail"]]) + + train_loader = cugraph_pyg.loader.LinkNeighborLoader( + data, + [args.fan_out] * args.num_layers, + edge_label_index=eli, + local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, + batch_size=args.batch_size, + shuffle=True, + drop_last=True, + ) + + def get_eval_loader(stage: str): + head = splits[stage]["head"] + tail = splits[stage]["tail"] + + head_neg = splits[stage]["head_neg"][:, : args.num_neg] + tail_neg = splits[stage]["tail_neg"][:, : args.num_neg] + + rel = splits[stage]["relation"] + + return torch.utils.data.DataLoader( + torch.utils.data.TensorDataset( + head.pin_memory(), + head_neg.pin_memory(), + tail.pin_memory(), + tail_neg.pin_memory(), + rel.pin_memory(), + ), + batch_size=1, + shuffle=False, + drop_last=True, + ) + + test_loader = get_eval_loader("test") + valid_loader = get_eval_loader("valid") + + num_train_steps = (args.num_pos // args.batch_size) if args.num_pos > 0 else 100 + + for epoch in range(1, 1 + args.epochs): + train( + epoch, + model, + optimizer, + train_loader, + edge_feature_store, + num_steps=num_train_steps, + ) + test("validation", epoch, model, valid_loader, num_steps=1024) + + test("test", epoch, model, test_loader, num_steps=1024) + + wm_finalize() + cugraph_comms_shutdown() + + +def partition_data( + data, splits, meta, edge_path, rel_path, pos_path, neg_path, meta_path +): + # Split and save edge index + os.makedirs( + edge_path, + exist_ok=True, + ) + for (r, e) in enumerate(torch.tensor_split(data.edge_index, world_size, dim=1)): + rank_path = os.path.join(edge_path, f"rank={r}.pt") + torch.save( + e.clone(), + rank_path, + ) + + # Split and save edge reltypes + os.makedirs( + rel_path, + exist_ok=True, + ) + for (r, f) in enumerate(torch.tensor_split(data.edge_reltype, world_size)): + rank_path = os.path.join(rel_path, f"rank={r}.pt") + torch.save( + f.clone(), + rank_path, + ) + + # Split and save positive edges + os.makedirs( + pos_path, + exist_ok=True, + ) + for stage in ["train", "test", "valid"]: + for (r, n) in enumerate( + torch.tensor_split( + torch.stack([splits[stage]["head"], splits[stage]["tail"]]), + world_size, + dim=-1, + ) + ): + rank_path = os.path.join(pos_path, f"rank={r}_{stage}.pt") + torch.save( + n.clone(), + rank_path, + ) + + # Split and save negative edges + os.makedirs( + neg_path, + exist_ok=True, + ) + for stage in ["test", "valid"]: + for (r, n) in enumerate( + torch.tensor_split( + torch.stack([splits[stage]["head_neg"], splits[stage]["tail_neg"]]), + world_size, + dim=1, + ) + ): + rank_path = os.path.join(neg_path, f"rank={r}_{stage}.pt") + torch.save(n.clone(), rank_path) + for (r, n) in enumerate( + torch.tensor_split(splits[stage]["relation"], world_size, dim=-1) + ): + print(n) + rank_path = os.path.join(neg_path, f"rank={r}_{stage}_relation.pt") + torch.save(n.clone(), rank_path) + + with open(meta_path, "w") as f: + json.dump(meta, f) + + +def load_partitioned_data(rank, edge_path, rel_path, pos_path, neg_path, meta_path): + from cugraph_pyg.data import GraphStore, WholeFeatureStore, TensorDictFeatureStore + + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() + edge_feature_store = WholeFeatureStore() + + # Load edge index + graph_store[("n", "e", "n"), "coo"] = torch.load( + os.path.join(edge_path, f"rank={rank}.pt") + ) + + # Load edge rel type + edge_feature_store[("n", "e", "n"), "rel"] = torch.load( + os.path.join(rel_path, f"rank={rank}.pt") + ) + + splits = {} + + # Load positive edges + for stage in ["train", "test", "valid"]: + head, tail = torch.load(os.path.join(pos_path, f"rank={rank}_{stage}.pt")) + splits[stage] = { + "head": head, + "tail": tail, + } + + # Load negative edges + for stage in ["test", "valid"]: + head_neg, tail_neg = torch.load( + os.path.join(neg_path, f"rank={rank}_{stage}.pt") + ) + relation = torch.load( + os.path.join(neg_path, f"rank={rank}_{stage}_relation.pt") + ) + splits[stage]["head_neg"] = head_neg + splits[stage]["tail_neg"] = tail_neg + splits[stage]["relation"] = relation + + with open(meta_path, "r") as f: + meta = json.load(f) + + return (feature_store, graph_store), edge_feature_store, splits, meta + + +if __name__ == "__main__": + args = parse_args() + + if "LOCAL_RANK" in os.environ: + torch.distributed.init_process_group("nccl") + world_size = torch.distributed.get_world_size() + global_rank = torch.distributed.get_rank() + local_rank = int(os.environ["LOCAL_RANK"]) + device = torch.device(local_rank) + + # Create the uid needed for cuGraph comms + if global_rank == 0: + cugraph_id = [cugraph_comms_create_unique_id()] + else: + cugraph_id = [None] + torch.distributed.broadcast_object_list(cugraph_id, src=0, device=device) + cugraph_id = cugraph_id[0] + + init_pytorch_worker(global_rank, local_rank, world_size, cugraph_id) + + # Split the data + edge_path = os.path.join(args.dataset_root, args.dataset + "_eix_part") + rel_path = os.path.join(args.dataset_root, args.dataset + "_rel_part") + pos_path = os.path.join(args.dataset_root, args.dataset + "_e_pos_part") + neg_path = os.path.join(args.dataset_root, args.dataset + "_e_neg_part") + meta_path = os.path.join(args.dataset_root, args.dataset + "_meta.json") + + if not args.skip_partition and global_rank == 0: + data = PygLinkPropPredDataset(args.dataset, root=args.dataset_root) + dataset = data[0] + + splits = data.get_edge_split() + + meta = {} + meta["num_nodes"] = int(dataset.num_nodes) + meta["num_rels"] = int(dataset.edge_reltype.max()) + 1 + + partition_data( + dataset, + splits, + meta, + edge_path=edge_path, + rel_path=rel_path, + pos_path=pos_path, + neg_path=neg_path, + meta_path=meta_path, + ) + del data + del dataset + del splits + torch.distributed.barrier() + + # Load partitions + data, edge_feature_store, splits, meta = load_partitioned_data( + rank=global_rank, + edge_path=edge_path, + rel_path=rel_path, + pos_path=pos_path, + neg_path=neg_path, + meta_path=meta_path, + ) + torch.distributed.barrier() + + model = RGCNEncoder( + meta["num_nodes"], + hidden_channels=args.hidden_channels, + num_relations=meta["num_rels"], + ) + + run_train( + global_rank, world_size, model, data, edge_feature_store, meta, splits, args + ) + else: + warnings.warn("This script should be run with 'torchrun`. Exiting.") diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py new file mode 100644 index 00000000000..67d7eecc7c2 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_sg.py @@ -0,0 +1,219 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This example illustrates link classification using the ogbl-wikikg2 dataset. + +import argparse + +from typing import Tuple, Dict, Any + +import torch +import cupy + +import rmm +from rmm.allocators.cupy import rmm_cupy_allocator +from rmm.allocators.torch import rmm_torch_allocator + +# Must change allocators immediately upon import +# or else other imports will cause memory to be +# allocated and prevent changing the allocator +rmm.reinitialize(devices=[0], pool_allocator=True, managed_memory=True) +cupy.cuda.set_allocator(rmm_cupy_allocator) +torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + +import torch.nn.functional as F # noqa: E402 +from torch.nn import Parameter # noqa: E402 +from torch_geometric.nn import FastRGCNConv, GAE # noqa: E402 +import torch_geometric # noqa: E402 +import cugraph_pyg # noqa: E402 + +# Enable cudf spilling to save gpu memory +from cugraph.testing.mg_utils import enable_spilling # noqa: E402 + +enable_spilling() + + +class RGCNEncoder(torch.nn.Module): + def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): + super().__init__() + self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) + self.conv1 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.conv2 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.reset_parameters() + + def reset_parameters(self): + torch.nn.init.xavier_uniform_(self.node_emb) + self.conv1.reset_parameters() + self.conv2.reset_parameters() + + def forward(self, edge_index, edge_type): + x = self.node_emb + x = self.conv1(x, edge_index, edge_type).relu_() + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv2(x, edge_index, edge_type) + return x + + +def load_data( + dataset_str, dataset_root: str +) -> Tuple[ + Tuple["torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore"], + "torch_geometric.data.FeatureStore", + Dict[str, Dict[str, "torch.Tensor"]], + Dict[str, Any], +]: + from ogb.linkproppred import PygLinkPropPredDataset + + data = PygLinkPropPredDataset(dataset_str, root=dataset_root) + dataset = data[0] + + splits = data.get_edge_split() + + from cugraph_pyg.data import GraphStore, TensorDictFeatureStore + + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() + edge_feature_store = TensorDictFeatureStore() + meta = {} + + graph_store[("n", "e", "n"), "coo"] = dataset.edge_index + edge_feature_store[("n", "e", "n"), "rel"] = dataset.edge_reltype.pin_memory() + meta["num_nodes"] = dataset.num_nodes + meta["num_rels"] = dataset.edge_reltype.max() + 1 + + return (feature_store, graph_store), edge_feature_store, splits, meta + + +def train(epoch, model, optimizer, train_loader, edge_feature_store): + model.train() + optimizer.zero_grad() + + for i, batch in enumerate(train_loader): + r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() + z = model.encode(batch.edge_index, r) + + loss = model.recon_loss(z, batch.edge_index) + loss.backward() + optimizer.step() + + if i % 10 == 0: + print(f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}") + if i == 100: + break + + +def test(stage, epoch, model, loader, num_steps=None): + # TODO support ROC-AUC metric + # Predict probabilities of future edges + model.eval() + + rr = 0.0 + for i, (h, h_neg, t, t_neg, r) in enumerate(loader): + if num_steps and i >= num_steps: + break + + ei = torch.concatenate( + [ + torch.stack([h, t]).cuda(), + torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), + ], + dim=-1, + ) + + r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() + + z = model.encode(ei, r) + q = model.decode(z, ei) + + _, ix = torch.sort(q, descending=True) + rr += 1.0 / (1.0 + ix[0]) + + print(f"epoch {epoch:02d} {stage} mrr:", rr / i) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--hidden_channels", type=int, default=128) + parser.add_argument("--num_layers", type=int, default=1) + parser.add_argument("--lr", type=float, default=0.001) + parser.add_argument("--epochs", type=int, default=4) + parser.add_argument("--batch_size", type=int, default=16384) + parser.add_argument("--num_neg", type=int, default=500) + parser.add_argument("--fan_out", type=int, default=10) + parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--seeds_per_call", type=int, default=-1) + + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + data, edge_feature_store, splits, meta = load_data(args.dataset, args.dataset_root) + + model = GAE( + RGCNEncoder( + meta["num_nodes"], + hidden_channels=args.hidden_channels, + num_relations=meta["num_rels"], + ) + ).cuda() + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) + + train_loader = cugraph_pyg.loader.LinkNeighborLoader( + data, + [args.fan_out] * args.num_layers, + edge_label_index=torch.stack( + [splits["train"]["head"], splits["train"]["tail"]] + ), + local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, + batch_size=args.batch_size, + shuffle=True, + drop_last=True, + ) + + def get_eval_loader(stage: str): + head = splits[stage]["head"] + tail = splits[stage]["tail"] + + head_neg = splits[stage]["head_neg"][:, : args.num_neg] + tail_neg = splits[stage]["tail_neg"][:, : args.num_neg] + + rel = splits[stage]["relation"] + + return torch.utils.data.DataLoader( + torch.utils.data.TensorDataset( + head.pin_memory(), + head_neg.pin_memory(), + tail.pin_memory(), + tail_neg.pin_memory(), + rel.pin_memory(), + ), + batch_size=1, + shuffle=False, + drop_last=True, + ) + + test_loader = get_eval_loader("test") + valid_loader = get_eval_loader("valid") + + for epoch in range(1, 1 + args.epochs): + train(epoch, model, optimizer, train_loader, edge_feature_store) + test("validation", epoch, model, valid_loader, num_steps=1024) + + test("test", epoch, model, test_loader, num_steps=1024) diff --git a/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py new file mode 100644 index 00000000000..2c0ae53a08e --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/examples/rgcn_link_class_snmg.py @@ -0,0 +1,320 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This example illustrates link classification using the ogbl-wikikg2 dataset. + +import os +import argparse +import warnings + +from typing import Tuple, Any + +import torch + +import torch.nn.functional as F +from torch.nn import Parameter +from torch_geometric.nn import FastRGCNConv, GAE +from torch.nn.parallel import DistributedDataParallel + +import torch_geometric +import cugraph_pyg + +from cugraph.gnn import ( + cugraph_comms_init, + cugraph_comms_create_unique_id, + cugraph_comms_shutdown, +) + +from pylibwholegraph.torch.initialize import ( + init as wm_init, + finalize as wm_finalize, +) + + +# Enable cudf spilling to save gpu memory +from cugraph.testing.mg_utils import enable_spilling + +# Ensures that a CUDA context is not created on import of rapids. +# Allows pytorch to create the context instead +os.environ["RAPIDS_NO_INITIALIZE"] = "1" + + +def init_pytorch_worker(rank, world_size, uid): + import rmm + + rmm.reinitialize(devices=[rank], pool_allocator=True, managed_memory=True) + + import cupy + from rmm.allocators.cupy import rmm_cupy_allocator + + cupy.cuda.set_allocator(rmm_cupy_allocator) + + cugraph_comms_init( + rank, + world_size, + uid, + rank, + ) + + wm_init(rank, world_size, rank, world_size) + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + torch.distributed.init_process_group( + "nccl", + rank=rank, + world_size=world_size, + ) + + enable_spilling() + + +class RGCNEncoder(torch.nn.Module): + def __init__(self, num_nodes, hidden_channels, num_relations, num_bases=30): + super().__init__() + self.node_emb = Parameter(torch.empty(num_nodes, hidden_channels)) + self.conv1 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.conv2 = FastRGCNConv( + hidden_channels, hidden_channels, num_relations, num_bases=num_bases + ) + self.reset_parameters() + + def reset_parameters(self): + torch.nn.init.xavier_uniform_(self.node_emb) + self.conv1.reset_parameters() + self.conv2.reset_parameters() + + def forward(self, edge_index, edge_type): + x = self.node_emb + x = self.conv1(x, edge_index, edge_type).relu_() + x = F.dropout(x, p=0.2, training=self.training) + x = self.conv2(x, edge_index, edge_type) + return x + + +def load_data( + rank: int, + world_size: int, + data: Any, +) -> Tuple[ + Tuple["torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore"], + "torch_geometric.data.FeatureStore", +]: + from cugraph_pyg.data import GraphStore, WholeFeatureStore, TensorDictFeatureStore + + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() # empty fs required by PyG + edge_feature_store = WholeFeatureStore() + + graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( + data.edge_index.cuda(), world_size, dim=1 + )[rank] + + edge_feature_store[("n", "e", "n"), "rel"] = torch.tensor_split( + data.edge_reltype.cuda(), + world_size, + )[rank] + + return (feature_store, graph_store), edge_feature_store + + +def train(epoch, model, optimizer, train_loader, edge_feature_store, num_steps=None): + model.train() + optimizer.zero_grad() + + for i, batch in enumerate(train_loader): + r = edge_feature_store[("n", "e", "n"), "rel"][batch.e_id].flatten().cuda() + z = model.encode(batch.edge_index, r) + + loss = model.recon_loss(z, batch.edge_index) + loss.backward() + optimizer.step() + + if i % 10 == 0: + print( + f"Epoch: {epoch:02d}, Iteration: {i:02d}, Loss: {loss:.4f}", flush=True + ) + if num_steps and i == num_steps: + break + + +def test(stage, epoch, model, loader, num_steps=None): + # TODO support ROC-AUC metric + # Predict probabilities of future edges + model.eval() + + rr = 0.0 + for i, (h, h_neg, t, t_neg, r) in enumerate(loader): + if num_steps and i >= num_steps: + break + + ei = torch.concatenate( + [ + torch.stack([h, t]).cuda(), + torch.stack([h_neg.flatten(), t_neg.flatten()]).cuda(), + ], + dim=-1, + ) + + r = torch.concatenate([r, torch.repeat_interleave(r, h_neg.shape[-1])]).cuda() + + z = model.encode(ei, r) + q = model.decode(z, ei) + + _, ix = torch.sort(q, descending=True) + rr += 1.0 / (1.0 + ix[0]) + + print(f"epoch {epoch:02d} {stage} mrr:", rr / i, flush=True) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--hidden_channels", type=int, default=128) + parser.add_argument("--num_layers", type=int, default=1) + parser.add_argument("--lr", type=float, default=0.001) + parser.add_argument("--epochs", type=int, default=4) + parser.add_argument("--batch_size", type=int, default=16384) + parser.add_argument("--num_neg", type=int, default=500) + parser.add_argument("--num_pos", type=int, default=-1) + parser.add_argument("--fan_out", type=int, default=10) + parser.add_argument("--dataset", type=str, default="ogbl-wikikg2") + parser.add_argument("--dataset_root", type=str, default="dataset") + parser.add_argument("--seeds_per_call", type=int, default=-1) + parser.add_argument("--n_devices", type=int, default=-1) + + return parser.parse_args() + + +def run_train(rank, world_size, uid, model, data, meta, splits, args): + init_pytorch_worker( + rank, + world_size, + uid, + ) + + model = model.to(rank) + model = GAE(DistributedDataParallel(model, device_ids=[rank])) + optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) + + data, edge_feature_store = load_data(rank, world_size, data) + + eli = torch.stack( + [ + torch.tensor_split(splits["train"]["head"], world_size)[rank], + torch.tensor_split(splits["train"]["tail"], world_size)[rank], + ] + ) + + train_loader = cugraph_pyg.loader.LinkNeighborLoader( + data, + [args.fan_out] * args.num_layers, + edge_label_index=eli, + local_seeds_per_call=args.seeds_per_call if args.seeds_per_call > 0 else None, + batch_size=args.batch_size, + shuffle=True, + drop_last=True, + ) + + def get_eval_loader(stage: str): + head = torch.tensor_split(splits[stage]["head"], world_size)[rank] + tail = torch.tensor_split(splits[stage]["tail"], world_size)[rank] + + head_neg = torch.tensor_split( + splits[stage]["head_neg"][:, : args.num_neg], world_size + )[rank] + tail_neg = torch.tensor_split( + splits[stage]["tail_neg"][:, : args.num_neg], world_size + )[rank] + + rel = torch.tensor_split(splits[stage]["relation"], world_size)[rank] + + return torch.utils.data.DataLoader( + torch.utils.data.TensorDataset( + head.pin_memory(), + head_neg.pin_memory(), + tail.pin_memory(), + tail_neg.pin_memory(), + rel.pin_memory(), + ), + batch_size=1, + shuffle=False, + drop_last=True, + ) + + test_loader = get_eval_loader("test") + valid_loader = get_eval_loader("valid") + + num_train_steps = (args.num_pos // args.batch_size) if args.num_pos > 0 else 100 + + for epoch in range(1, 1 + args.epochs): + train( + epoch, + model, + optimizer, + train_loader, + edge_feature_store, + num_steps=num_train_steps, + ) + test("validation", epoch, model, valid_loader, num_steps=1024) + + test("test", epoch, model, test_loader, num_steps=1024) + + wm_finalize() + cugraph_comms_shutdown() + + +if __name__ == "__main__": + if "CI_RUN" in os.environ and os.environ["CI_RUN"] == "1": + warnings.warn("Skipping SMNG example in CI due to memory limit") + else: + args = parse_args() + + # change the allocator before any allocations are made + from rmm.allocators.torch import rmm_torch_allocator + + torch.cuda.memory.change_current_allocator(rmm_torch_allocator) + + # import ogb here to stop it from creating a context and breaking pytorch/rmm + from ogb.linkproppred import PygLinkPropPredDataset + + data = PygLinkPropPredDataset(args.dataset, root=args.dataset_root) + dataset = data[0] + + splits = data.get_edge_split() + + meta = {} + meta["num_nodes"] = dataset.num_nodes + meta["num_rels"] = dataset.edge_reltype.max() + 1 + + model = RGCNEncoder( + meta["num_nodes"], + hidden_channels=args.hidden_channels, + num_relations=meta["num_rels"], + ) + + print("Data =", data) + if args.n_devices == -1: + world_size = torch.cuda.device_count() + else: + world_size = args.n_devices + print("Using", world_size, "GPUs...") + + uid = cugraph_comms_create_unique_id() + torch.multiprocessing.spawn( + run_train, + (world_size, uid, model, data, meta, splits, args), + nprocs=world_size, + join=True, + ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py b/python/cugraph-pyg/cugraph_pyg/loader/__init__.py index cad66aaa183..c804b3d1f97 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/__init__.py @@ -16,6 +16,9 @@ from cugraph_pyg.loader.node_loader import NodeLoader from cugraph_pyg.loader.neighbor_loader import NeighborLoader +from cugraph_pyg.loader.link_loader import LinkLoader +from cugraph_pyg.loader.link_neighbor_loader import LinkNeighborLoader + from cugraph_pyg.loader.dask_node_loader import DaskNeighborLoader from cugraph_pyg.loader.dask_node_loader import BulkSampleLoader diff --git a/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py new file mode 100644 index 00000000000..77e2ac4f99d --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/loader/link_loader.py @@ -0,0 +1,205 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import cugraph_pyg +from typing import Union, Tuple, Callable, Optional + +from cugraph.utilities.utils import import_optional + +torch_geometric = import_optional("torch_geometric") +torch = import_optional("torch") + + +class LinkLoader: + """ + Duck-typed version of torch_geometric.loader.LinkLoader. + Loads samples from batches of input nodes using a + `~cugraph_pyg.sampler.BaseSampler.sample_from_edges` + function. + """ + + def __init__( + self, + data: Union[ + "torch_geometric.data.Data", + "torch_geometric.data.HeteroData", + Tuple[ + "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" + ], + ], + link_sampler: "cugraph_pyg.sampler.BaseSampler", + edge_label_index: "torch_geometric.typing.InputEdges" = None, + edge_label: "torch_geometric.typing.OptTensor" = None, + edge_label_time: "torch_geometric.typing.OptTensor" = None, + neg_sampling: Optional["torch_geometric.sampler.NegativeSampling"] = None, + neg_sampling_ratio: Optional[Union[int, float]] = None, + transform: Optional[Callable] = None, + transform_sampler_output: Optional[Callable] = None, + filter_per_worker: Optional[bool] = None, + custom_cls: Optional["torch_geometric.data.HeteroData"] = None, + input_id: "torch_geometric.typing.OptTensor" = None, + batch_size: int = 1, # refers to number of edges in batch + shuffle: bool = False, + drop_last: bool = False, + **kwargs, + ): + """ + Parameters + ---------- + data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] + See torch_geometric.loader.NodeLoader. + link_sampler: BaseSampler + See torch_geometric.loader.LinkLoader. + edge_label_index: InputEdges + See torch_geometric.loader.LinkLoader. + edge_label: OptTensor + See torch_geometric.loader.LinkLoader. + edge_label_time: OptTensor + See torch_geometric.loader.LinkLoader. + neg_sampling: Optional[NegativeSampling] + Type of negative sampling to perform, if desired. + See torch_geometric.loader.LinkLoader. + neg_sampling_ratio: Optional[Union[int, float]] + Negative sampling ratio. Affects how many negative + samples are generated. + See torch_geometric.loader.LinkLoader. + transform: Callable (optional, default=None) + This argument currently has no effect. + transform_sampler_output: Callable (optional, default=None) + This argument currently has no effect. + filter_per_worker: bool (optional, default=False) + This argument currently has no effect. + custom_cls: HeteroData + This argument currently has no effect. This loader will + always return a Data or HeteroData object. + input_id: OptTensor + See torch_geometric.loader.LinkLoader. + + """ + if not isinstance(data, (list, tuple)) or not isinstance( + data[1], cugraph_pyg.data.GraphStore + ): + # Will eventually automatically convert these objects to cuGraph objects. + raise NotImplementedError("Currently can't accept non-cugraph graphs") + + if not isinstance(link_sampler, cugraph_pyg.sampler.BaseSampler): + raise NotImplementedError("Must provide a cuGraph sampler") + + if edge_label_time is not None: + raise ValueError("Temporal sampling is currently unsupported") + + if filter_per_worker: + warnings.warn("filter_per_worker is currently ignored") + + if custom_cls is not None: + warnings.warn("custom_cls is currently ignored") + + if transform is not None: + warnings.warn("transform is currently ignored.") + + if transform_sampler_output is not None: + warnings.warn("transform_sampler_output is currently ignored.") + + if neg_sampling_ratio is not None: + warnings.warn( + "The 'neg_sampling_ratio' argument is deprecated in PyG" + " and is not supported in cuGraph-PyG." + ) + + neg_sampling = torch_geometric.sampler.NegativeSampling.cast(neg_sampling) + + ( + input_type, + edge_label_index, + ) = torch_geometric.loader.utils.get_edge_label_index( + data, + (None, edge_label_index), + ) + + self.__input_data = torch_geometric.sampler.EdgeSamplerInput( + input_id=torch.arange( + edge_label_index[0].numel(), dtype=torch.int64, device="cuda" + ) + if input_id is None + else input_id, + row=edge_label_index[0], + col=edge_label_index[1], + label=edge_label, + time=edge_label_time, + input_type=input_type, + ) + + # Edge label check from torch_geometric.loader.LinkLoader + if ( + neg_sampling is not None + and neg_sampling.is_binary() + and edge_label is not None + and edge_label.min() == 0 + ): + edge_label = edge_label + 1 + + if ( + neg_sampling is not None + and neg_sampling.is_triplet() + and edge_label is not None + ): + raise ValueError( + "'edge_label' needs to be undefined for " + "'triplet'-based negative sampling. Please use " + "`src_index`, `dst_pos_index` and " + "`neg_pos_index` of the returned mini-batch " + "instead to differentiate between positive and " + "negative samples." + ) + + self.__data = data + + self.__link_sampler = link_sampler + self.__neg_sampling = neg_sampling + + self.__batch_size = batch_size + self.__shuffle = shuffle + self.__drop_last = drop_last + + def __iter__(self): + if self.__shuffle: + perm = torch.randperm(self.__input_data.row.numel()) + else: + perm = torch.arange(self.__input_data.row.numel()) + + if self.__drop_last: + d = perm.numel() % self.__batch_size + perm = perm[:-d] + + input_data = torch_geometric.sampler.EdgeSamplerInput( + input_id=self.__input_data.input_id[perm], + row=self.__input_data.row[perm], + col=self.__input_data.col[perm], + label=None + if self.__input_data.label is None + else self.__input_data.label[perm], + time=None + if self.__input_data.time is None + else self.__input_data.time[perm], + input_type=self.__input_data.input_type, + ) + + return cugraph_pyg.sampler.SampleIterator( + self.__data, + self.__link_sampler.sample_from_edges( + input_data, + neg_sampling=self.__neg_sampling, + ), + ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py new file mode 100644 index 00000000000..080565368c4 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/loader/link_neighbor_loader.py @@ -0,0 +1,243 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +from typing import Union, Tuple, Optional, Callable, List, Dict + +import cugraph_pyg +from cugraph_pyg.loader import LinkLoader +from cugraph_pyg.sampler import BaseSampler + +from cugraph.gnn import NeighborSampler, DistSampleWriter +from cugraph.utilities.utils import import_optional + +torch_geometric = import_optional("torch_geometric") + + +class LinkNeighborLoader(LinkLoader): + """ + Duck-typed version of torch_geometric.loader.LinkNeighborLoader + + Link loader that implements the neighbor sampling + algorithm used in GraphSAGE. + """ + + def __init__( + self, + data: Union[ + "torch_geometric.data.Data", + "torch_geometric.data.HeteroData", + Tuple[ + "torch_geometric.data.FeatureStore", "torch_geometric.data.GraphStore" + ], + ], + num_neighbors: Union[ + List[int], Dict["torch_geometric.typing.EdgeType", List[int]] + ], + edge_label_index: "torch_geometric.typing.InputEdges" = None, + edge_label: "torch_geometric.typing.OptTensor" = None, + edge_label_time: "torch_geometric.typing.OptTensor" = None, + replace: bool = False, + subgraph_type: Union[ + "torch_geometric.typing.SubgraphType", str + ] = "directional", + disjoint: bool = False, + temporal_strategy: str = "uniform", + neg_sampling: Optional["torch_geometric.sampler.NegativeSampling"] = None, + neg_sampling_ratio: Optional[Union[int, float]] = None, + time_attr: Optional[str] = None, + weight_attr: Optional[str] = None, + transform: Optional[Callable] = None, + transform_sampler_output: Optional[Callable] = None, + is_sorted: bool = False, + filter_per_worker: Optional[bool] = None, + neighbor_sampler: Optional["torch_geometric.sampler.NeighborSampler"] = None, + directed: bool = True, # Deprecated. + batch_size: int = 16, # Refers to number of edges per batch. + directory: Optional[str] = None, + batches_per_partition=256, + format: str = "parquet", + compression: Optional[str] = None, + local_seeds_per_call: Optional[int] = None, + **kwargs, + ): + """ + data: Data, HeteroData, or Tuple[FeatureStore, GraphStore] + See torch_geometric.loader.LinkNeighborLoader. + num_neighbors: List[int] or Dict[EdgeType, List[int]] + Fanout values. + See torch_geometric.loader.LinkNeighborLoader. + edge_label_index: InputEdges + Input edges for sampling. + See torch_geometric.loader.LinkNeighborLoader. + edge_label: OptTensor + Labels for input edges. + See torch_geometric.loader.LinkNeighborLoader. + edge_label_time: OptTensor + Time attribute for input edges. + See torch_geometric.loader.LinkNeighborLoader. + replace: bool (optional, default=False) + Whether to sample with replacement. + See torch_geometric.loader.LinkNeighborLoader. + subgraph_type: Union[SubgraphType, str] (optional, default='directional') + The type of subgraph to return. + Currently only 'directional' is supported. + See torch_geometric.loader.LinkNeighborLoader. + disjoint: bool (optional, default=False) + Whether to perform disjoint sampling. + Currently unsupported. + See torch_geometric.loader.LinkNeighborLoader. + temporal_strategy: str (optional, default='uniform') + Currently only 'uniform' is suppported. + See torch_geometric.loader.LinkNeighborLoader. + time_attr: str (optional, default=None) + Used for temporal sampling. + See torch_geometric.loader.LinkNeighborLoader. + weight_attr: str (optional, default=None) + Used for biased sampling. + See torch_geometric.loader.LinkNeighborLoader. + transform: Callable (optional, default=None) + See torch_geometric.loader.LinkNeighborLoader. + transform_sampler_output: Callable (optional, default=None) + See torch_geometric.loader.LinkNeighborLoader. + is_sorted: bool (optional, default=False) + Ignored by cuGraph. + See torch_geometric.loader.LinkNeighborLoader. + filter_per_worker: bool (optional, default=False) + Currently ignored by cuGraph, but this may + change once in-memory sampling is implemented. + See torch_geometric.loader.LinkNeighborLoader. + neighbor_sampler: torch_geometric.sampler.NeighborSampler + (optional, default=None) + Not supported by cuGraph. + See torch_geometric.loader.LinkNeighborLoader. + directed: bool (optional, default=True) + Deprecated. + See torch_geometric.loader.LinkNeighborLoader. + batch_size: int (optional, default=16) + The number of input nodes per output minibatch. + See torch.utils.dataloader. + directory: str (optional, default=None) + The directory where samples will be temporarily stored, + if spilling samples to disk. If None, this loader + will perform buffered in-memory sampling. + If writing to disk, setting this argument + to a tempfile.TemporaryDirectory with a context + manager is a good option but depending on the filesystem, + you may want to choose an alternative location with fast I/O + intead. + See cugraph.gnn.DistSampleWriter. + batches_per_partition: int (optional, default=256) + The number of batches per partition if writing samples to + disk. Manually tuning this parameter is not recommended + but reducing it may help conserve GPU memory. + See cugraph.gnn.DistSampleWriter. + format: str (optional, default='parquet') + If writing samples to disk, they will be written in this + file format. + See cugraph.gnn.DistSampleWriter. + compression: str (optional, default=None) + The compression type to use if writing samples to disk. + If not provided, it is automatically chosen. + local_seeds_per_call: int (optional, default=None) + The number of seeds to process within a single sampling call. + Manually tuning this parameter is not recommended but reducing + it may conserve GPU memory. The total number of seeds processed + per sampling call is equal to the sum of this parameter across + all workers. If not provided, it will be automatically + calculated. + See cugraph.gnn.DistSampler. + **kwargs + Other keyword arguments passed to the superclass. + """ + + subgraph_type = torch_geometric.sampler.base.SubgraphType(subgraph_type) + + if not directed: + subgraph_type = torch_geometric.sampler.base.SubgraphType.induced + warnings.warn( + "The 'directed' argument is deprecated. " + "Use subgraph_type='induced' instead." + ) + if subgraph_type != torch_geometric.sampler.base.SubgraphType.directional: + raise ValueError("Only directional subgraphs are currently supported") + if disjoint: + raise ValueError("Disjoint sampling is currently unsupported") + if temporal_strategy != "uniform": + warnings.warn("Only the uniform temporal strategy is currently supported") + if neighbor_sampler is not None: + raise ValueError("Passing a neighbor sampler is currently unsupported") + if time_attr is not None: + raise ValueError("Temporal sampling is currently unsupported") + if is_sorted: + warnings.warn("The 'is_sorted' argument is ignored by cuGraph.") + if not isinstance(data, (list, tuple)) or not isinstance( + data[1], cugraph_pyg.data.GraphStore + ): + # Will eventually automatically convert these objects to cuGraph objects. + raise NotImplementedError("Currently can't accept non-cugraph graphs") + + if compression is None: + compression = "CSR" + elif compression not in ["CSR", "COO"]: + raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')") + + writer = ( + None + if directory is None + else DistSampleWriter( + directory=directory, + batches_per_partition=batches_per_partition, + format=format, + ) + ) + + feature_store, graph_store = data + + if weight_attr is not None: + graph_store._set_weight_attr((feature_store, weight_attr)) + + sampler = BaseSampler( + NeighborSampler( + graph_store._graph, + writer, + retain_original_seeds=True, + fanout=num_neighbors, + prior_sources_behavior="exclude", + deduplicate_sources=True, + compression=compression, + compress_per_hop=False, + with_replacement=replace, + local_seeds_per_call=local_seeds_per_call, + biased=(weight_attr is not None), + ), + (feature_store, graph_store), + batch_size=batch_size, + ) + # TODO add heterogeneous support and pass graph_store._vertex_offsets + + super().__init__( + (feature_store, graph_store), + sampler, + edge_label_index=edge_label_index, + edge_label=edge_label, + edge_label_time=edge_label_time, + neg_sampling=neg_sampling, + neg_sampling_ratio=neg_sampling_ratio, + transform=transform, + transform_sampler_output=transform_sampler_output, + filter_per_worker=filter_per_worker, + batch_size=batch_size, + **kwargs, + ) diff --git a/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py index 1199895e99d..1da2c6dc381 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/neighbor_loader.py @@ -12,7 +12,6 @@ # limitations under the License. import warnings -import tempfile from typing import Union, Tuple, Optional, Callable, List, Dict @@ -20,7 +19,7 @@ from cugraph_pyg.loader import NodeLoader from cugraph_pyg.sampler import BaseSampler -from cugraph.gnn import UniformNeighborSampler, DistSampleWriter +from cugraph.gnn import NeighborSampler, DistSampleWriter from cugraph.utilities.utils import import_optional torch_geometric = import_optional("torch_geometric") @@ -63,7 +62,7 @@ def __init__( neighbor_sampler: Optional["torch_geometric.sampler.NeighborSampler"] = None, directed: bool = True, # Deprecated. batch_size: int = 16, - directory: str = None, + directory: Optional[str] = None, batches_per_partition=256, format: str = "parquet", compression: Optional[str] = None, @@ -123,14 +122,14 @@ def __init__( The number of input nodes per output minibatch. See torch.utils.dataloader. directory: str (optional, default=None) - The directory where samples will be temporarily stored. - It is recommend that this be set by the user, usually - setting it to a tempfile.TemporaryDirectory with a context + The directory where samples will be temporarily stored, + if spilling samples to disk. If None, this loader + will perform buffered in-memory sampling. + If writing to disk, setting this argument + to a tempfile.TemporaryDirectory with a context manager is a good option but depending on the filesystem, you may want to choose an alternative location with fast I/O intead. - If not set, this will create a TemporaryDirectory that will - persist until this object is garbage collected. See cugraph.gnn.DistSampleWriter. batches_per_partition: int (optional, default=256) The number of batches per partition if writing samples to @@ -174,8 +173,6 @@ def __init__( raise ValueError("Passing a neighbor sampler is currently unsupported") if time_attr is not None: raise ValueError("Temporal sampling is currently unsupported") - if weight_attr is not None: - raise ValueError("Biased sampling is currently unsupported") if is_sorted: warnings.warn("The 'is_sorted' argument is ignored by cuGraph.") if not isinstance(data, (list, tuple)) or not isinstance( @@ -184,25 +181,28 @@ def __init__( # Will eventually automatically convert these objects to cuGraph objects. raise NotImplementedError("Currently can't accept non-cugraph graphs") - if directory is None: - warnings.warn("Setting a directory to store samples is recommended.") - self._tempdir = tempfile.TemporaryDirectory() - directory = self._tempdir.name - if compression is None: compression = "CSR" elif compression not in ["CSR", "COO"]: raise ValueError("Invalid value for compression (expected 'CSR' or 'COO')") - writer = DistSampleWriter( - directory=directory, - batches_per_partition=batches_per_partition, - format=format, + writer = ( + None + if directory is None + else DistSampleWriter( + directory=directory, + batches_per_partition=batches_per_partition, + format=format, + ) ) feature_store, graph_store = data + + if weight_attr is not None: + graph_store._set_weight_attr((feature_store, weight_attr)) + sampler = BaseSampler( - UniformNeighborSampler( + NeighborSampler( graph_store._graph, writer, retain_original_seeds=True, @@ -213,6 +213,7 @@ def __init__( compress_per_hop=False, with_replacement=replace, local_seeds_per_call=local_seeds_per_call, + biased=(weight_attr is not None), ), (feature_store, graph_store), batch_size=batch_size, diff --git a/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py b/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py index 49923783d6b..4b236f75885 100644 --- a/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/loader/node_loader.py @@ -110,8 +110,10 @@ def __init__( input_id, ) - self.__input_data = torch_geometric.loader.node_loader.NodeSamplerInput( - input_id=input_id, + self.__input_data = torch_geometric.sampler.NodeSamplerInput( + input_id=torch.arange(len(input_nodes), dtype=torch.int64, device="cuda") + if input_id is None + else input_id, node=input_nodes, time=None, input_type=input_type, @@ -135,10 +137,8 @@ def __iter__(self): d = perm.numel() % self.__batch_size perm = perm[:-d] - input_data = torch_geometric.loader.node_loader.NodeSamplerInput( - input_id=None - if self.__input_data.input_id is None - else self.__input_data.input_id[perm], + input_data = torch_geometric.sampler.NodeSamplerInput( + input_id=self.__input_data.input_id[perm], node=self.__input_data.node[perm], time=None if self.__input_data.time is None diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py index d877b856ad6..bc3d4fd8d3c 100644 --- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py +++ b/python/cugraph-pyg/cugraph_pyg/sampler/sampler.py @@ -14,7 +14,9 @@ from typing import Optional, Iterator, Union, Dict, Tuple from cugraph.utilities.utils import import_optional -from cugraph.gnn import DistSampler, DistSampleReader +from cugraph.gnn import DistSampler + +from .sampler_utils import filter_cugraph_pyg_store, neg_sample, neg_cat torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") @@ -58,13 +60,31 @@ def __next__(self): next_sample = next(self.__output_iter) if isinstance(next_sample, torch_geometric.sampler.SamplerOutput): sz = next_sample.edge.numel() - if sz == next_sample.col.numel(): + if sz == next_sample.col.numel() and ( + next_sample.node.numel() > next_sample.col[-1] + ): + # This will only trigger on very small batches and will have minimal + # performance impact. If COO output is removed, then this condition + # can be avoided. col = next_sample.col else: col = torch_geometric.edge_index.ptr2index( next_sample.col, next_sample.edge.numel() ) + data = filter_cugraph_pyg_store( + self.__feature_store, + self.__graph_store, + next_sample.node, + next_sample.row, + col, + next_sample.edge, + None, + ) + + """ + # TODO Re-enable this once PyG resolves + # the issue with edge features (9566) data = torch_geometric.loader.utils.filter_custom_store( self.__feature_store, self.__graph_store, @@ -74,6 +94,7 @@ def __next__(self): next_sample.edge, None, ) + """ if "n_id" not in data: data.n_id = next_sample.node @@ -85,10 +106,20 @@ def __next__(self): data.num_sampled_nodes = next_sample.num_sampled_nodes data.num_sampled_edges = next_sample.num_sampled_edges - data.input_id = data.batch - data.seed_time = None + data.input_id = next_sample.metadata[0] data.batch_size = data.input_id.size(0) + if len(next_sample.metadata) == 2: + data.seed_time = next_sample.metadata[1] + elif len(next_sample.metadata) == 4: + ( + data.edge_label_index, + data.edge_label, + data.seed_time, + ) = next_sample.metadata[1:] + else: + raise ValueError("Invalid metadata") + elif isinstance(next_sample, torch_geometric.sampler.HeteroSamplerOutput): col = {} for edge_type, col_idx in next_sample.col: @@ -136,13 +167,15 @@ class SampleReader: Iterator that processes results from the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader): + def __init__( + self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] + ): """ Constructs a new SampleReader. Parameters ---------- - base_reader: DistSampleReader + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] The reader responsible for loading saved samples produced by the cuGraph distributed sampler. """ @@ -157,6 +190,9 @@ def __next__(self): self.__base_reader ) + self.__raw_sample_data["input_offsets"] -= self.__raw_sample_data[ + "input_offsets" + ][0].clone() self.__raw_sample_data["label_hop_offsets"] -= self.__raw_sample_data[ "label_hop_offsets" ][0].clone() @@ -186,14 +222,16 @@ class HomogeneousSampleReader(SampleReader): produced by the cuGraph distributed sampler. """ - def __init__(self, base_reader: DistSampleReader): + def __init__( + self, base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] + ): """ Constructs a new HomogeneousSampleReader Parameters ---------- - base_reader: DistSampleReader - The reader responsible for loading saved samples produced by + base_reader: Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]] + The iterator responsible for loading saved samples produced by the cuGraph distributed sampler. """ super().__init__(base_reader) @@ -246,14 +284,61 @@ def __decode_csc(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): [num_seeds, num_sampled_nodes_hops.diff(prepend=num_seeds)] ) + input_index = raw_sample_data["input_index"][ + raw_sample_data["input_offsets"][index] : raw_sample_data["input_offsets"][ + index + 1 + ] + ] + + num_seeds = input_index.numel() + input_index = input_index[input_index >= 0] + + num_pos = input_index.numel() + num_neg = num_seeds - num_pos + if num_neg > 0: + edge_label = torch.concat( + [ + torch.full((num_pos,), 1.0), + torch.full((num_neg,), 0.0), + ] + ) + else: + edge_label = None + + edge_inverse = ( + ( + raw_sample_data["edge_inverse"][ + (raw_sample_data["input_offsets"][index] * 2) : ( + raw_sample_data["input_offsets"][index + 1] * 2 + ) + ] + ) + if "edge_inverse" in raw_sample_data + else None + ) + + if edge_inverse is None: + metadata = ( + input_index, + None, # TODO this will eventually include time + ) + else: + metadata = ( + input_index, + edge_inverse.view(2, -1), + edge_label, + None, # TODO this will eventually include time + ) + return torch_geometric.sampler.SamplerOutput( node=renumber_map.cpu(), row=minors, col=major_offsets, - edge=edge_id, + edge=edge_id.cpu(), batch=renumber_map[:num_seeds], num_sampled_nodes=num_sampled_nodes.cpu(), num_sampled_edges=num_sampled_edges.cpu(), + metadata=metadata, ) def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): @@ -299,6 +384,37 @@ def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): [num_seeds, num_sampled_nodes_hops.diff(prepend=num_seeds)] ) + input_index = raw_sample_data["input_index"][ + raw_sample_data["input_offsets"][index] : raw_sample_data["input_offsets"][ + index + 1 + ] + ] + + edge_inverse = ( + ( + raw_sample_data["edge_inverse"][ + (raw_sample_data["input_offsets"][index] * 2) : ( + raw_sample_data["input_offsets"][index + 1] * 2 + ) + ] + ) + if "edge_inverse" in raw_sample_data + else None + ) + + if edge_inverse is None: + metadata = ( + input_index, + None, # TODO this will eventually include time + ) + else: + metadata = ( + input_index, + edge_inverse.view(2, -1), + None, + None, # TODO this will eventually include time + ) + return torch_geometric.sampler.SamplerOutput( node=renumber_map.cpu(), row=minors, @@ -307,6 +423,7 @@ def __decode_coo(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): batch=renumber_map[:num_seeds], num_sampled_nodes=num_sampled_nodes, num_sampled_edges=num_sampled_edges, + metadata=metadata, ) def _decode(self, raw_sample_data: Dict[str, "torch.Tensor"], index: int): @@ -337,8 +454,8 @@ def sample_from_nodes( "torch_geometric.sampler.SamplerOutput", ] ]: - self.__sampler.sample_from_nodes( - index.node, batch_size=self.__batch_size, **kwargs + reader = self.__sampler.sample_from_nodes( + index.node, batch_size=self.__batch_size, input_id=index.input_id, **kwargs ) edge_attrs = self.__graph_store.get_all_edge_attrs() @@ -346,7 +463,7 @@ def sample_from_nodes( len(edge_attrs) == 1 and edge_attrs[0].edge_type[0] == edge_attrs[0].edge_type[2] ): - return HomogeneousSampleReader(self.__sampler.get_reader()) + return HomogeneousSampleReader(reader) else: # TODO implement heterogeneous sampling raise NotImplementedError( @@ -365,4 +482,59 @@ def sample_from_edges( "torch_geometric.sampler.SamplerOutput", ] ]: - raise NotImplementedError("Edge sampling is currently unimplemented.") + src = index.row + dst = index.col + input_id = index.input_id + neg_batch_size = 0 + if neg_sampling: + # Sample every negative subset at once. + # TODO handle temporal sampling (node_time) + src_neg, dst_neg = neg_sample( + self.__graph_store, + index.row, + index.col, + self.__batch_size, + neg_sampling, + None, # src_time, + None, # src_node_time, + ) + if neg_sampling.is_binary(): + src, _ = neg_cat(src.cuda(), src_neg, self.__batch_size) + else: + # triplet, cat dst to src so length is the same; will + # result in the same set of unique vertices + src, _ = neg_cat(src.cuda(), dst_neg, self.__batch_size) + dst, neg_batch_size = neg_cat(dst.cuda(), dst_neg, self.__batch_size) + + # Concatenate -1s so the input id tensor lines up and can + # be processed by the dist sampler. + # When loading the output batch, '-1' will be dropped. + input_id, _ = neg_cat( + input_id, + torch.full( + (dst_neg.numel(),), -1, dtype=torch.int64, device=input_id.device + ), + self.__batch_size, + ) + + # TODO for temporal sampling, node times have to be + # adjusted here. + reader = self.__sampler.sample_from_edges( + torch.stack([src, dst]), # reverse of usual convention + input_id=input_id, + batch_size=self.__batch_size + neg_batch_size, + **kwargs, + ) + + edge_attrs = self.__graph_store.get_all_edge_attrs() + if ( + len(edge_attrs) == 1 + and edge_attrs[0].edge_type[0] == edge_attrs[0].edge_type[2] + ): + return HomogeneousSampleReader(reader) + else: + # TODO implement heterogeneous sampling + raise NotImplementedError( + "Sampling heterogeneous graphs is currently" + " unsupported in the non-dask API" + ) diff --git a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py index c3e19393970..b3d56ef9992 100644 --- a/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py +++ b/python/cugraph-pyg/cugraph_pyg/sampler/sampler_utils.py @@ -14,10 +14,14 @@ from typing import Sequence, Dict, Tuple -from cugraph_pyg.data import DaskGraphStore +from math import ceil + +from cugraph_pyg.data import GraphStore, DaskGraphStore from cugraph.utilities.utils import import_optional import cudf +import cupy +import pylibcugraph dask_cudf = import_optional("dask_cudf") torch_geometric = import_optional("torch_geometric") @@ -403,3 +407,125 @@ def _sampler_output_from_sampling_results_heterogeneous( num_sampled_edges={k: t.tolist() for k, t in num_edges_per_hop_dict.items()}, metadata=metadata, ) + + +def filter_cugraph_pyg_store( + feature_store, + graph_store, + node, + row, + col, + edge, + clx, +) -> "torch_geometric.data.Data": + data = torch_geometric.data.Data() + + data.edge_index = torch.stack([row, col], dim=0) + + required_attrs = [] + for attr in feature_store.get_all_tensor_attrs(): + attr.index = edge if isinstance(attr.group_name, tuple) else node + required_attrs.append(attr) + data.num_nodes = attr.index.size(0) + + tensors = feature_store.multi_get_tensor(required_attrs) + for i, attr in enumerate(required_attrs): + data[attr.attr_name] = tensors[i] + + return data + + +def neg_sample( + graph_store: GraphStore, + seed_src: "torch.Tensor", + seed_dst: "torch.Tensor", + batch_size: int, + neg_sampling: "torch_geometric.sampler.NegativeSampling", + time: "torch.Tensor", + node_time: "torch.Tensor", +) -> Tuple["torch.Tensor", "torch.Tensor"]: + try: + # Compatibility for PyG 2.5 + src_weight = neg_sampling.src_weight + dst_weight = neg_sampling.dst_weight + except AttributeError: + src_weight = neg_sampling.weight + dst_weight = neg_sampling.weight + unweighted = src_weight is None and dst_weight is None + + # Require at least one negative edge per batch + num_neg = max( + int(ceil(neg_sampling.amount * seed_src.numel())), + int(ceil(seed_src.numel() / batch_size)), + ) + + if graph_store.is_multi_gpu: + num_neg_global = torch.tensor([num_neg], device="cuda") + torch.distributed.all_reduce(num_neg_global, op=torch.distributed.ReduceOp.SUM) + num_neg = int(num_neg_global) + else: + num_neg_global = num_neg + + if node_time is None: + result_dict = pylibcugraph.negative_sampling( + graph_store._resource_handle, + graph_store._graph, + num_neg_global, + vertices=None + if unweighted + else cupy.arange(src_weight.numel(), dtype="int64"), + src_bias=None if src_weight is None else cupy.asarray(src_weight), + dst_bias=None if dst_weight is None else cupy.asarray(dst_weight), + remove_duplicates=False, + remove_false_negatives=False, + exact_number_of_samples=True, + do_expensive_check=False, + ) + + src_neg = torch.as_tensor(result_dict["sources"], device="cuda")[:num_neg] + dst_neg = torch.as_tensor(result_dict["destinations"], device="cuda")[:num_neg] + + # TODO modifiy the C API so this condition is impossible + if src_neg.numel() < num_neg: + num_gen = num_neg - src_neg.numel() + src_neg = torch.concat( + [ + src_neg, + torch.randint( + 0, src_neg.max(), (num_gen,), device="cuda", dtype=torch.int64 + ), + ] + ) + dst_neg = torch.concat( + [ + dst_neg, + torch.randint( + 0, dst_neg.max(), (num_gen,), device="cuda", dtype=torch.int64 + ), + ] + ) + return src_neg, dst_neg + raise NotImplementedError( + "Temporal negative sampling is currently unimplemented in cuGraph-PyG" + ) + + +def neg_cat( + seed_pos: "torch.Tensor", seed_neg: "torch.Tensor", pos_batch_size: int +) -> Tuple["torch.Tensor", int]: + num_seeds = seed_pos.numel() + num_batches = int(ceil(num_seeds / pos_batch_size)) + neg_batch_size = int(ceil(seed_neg.numel() / num_batches)) + + batch_pos_offsets = torch.full((num_batches,), pos_batch_size).cumsum(-1)[:-1] + seed_pos_splits = torch.tensor_split(seed_pos, batch_pos_offsets) + + batch_neg_offsets = torch.full((num_batches,), neg_batch_size).cumsum(-1)[:-1] + seed_neg_splits = torch.tensor_split(seed_neg, batch_neg_offsets) + + return ( + torch.concatenate( + [torch.concatenate(s) for s in zip(seed_pos_splits, seed_neg_splits)] + ), + neg_batch_size, + ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py index 8edb5276953..8ee18a826f7 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader.py @@ -16,6 +16,7 @@ from cugraph.datasets import karate from cugraph.utilities.utils import import_optional, MissingModule +import cugraph_pyg from cugraph_pyg.data import TensorDictFeatureStore, GraphStore from cugraph_pyg.loader import NeighborLoader @@ -46,9 +47,150 @@ def test_neighbor_loader(): (feature_store, graph_store), [5, 5], input_nodes=torch.arange(34), - directory=".", ) for batch in loader: assert isinstance(batch, torch_geometric.data.Data) assert (feature_store["person", "feat"][batch.n_id] == batch.feat).all() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.sg +def test_neighbor_loader_biased(): + eix = torch.tensor( + [ + [3, 4, 5], + [0, 1, 2], + ] + ) + + graph_store = GraphStore() + graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") + + feature_store = TensorDictFeatureStore() + feature_store["person", "feat"] = torch.randint(128, (6, 12)) + feature_store[("person", "knows", "person"), "bias"] = torch.tensor( + [0, 12, 14], dtype=torch.float32 + ) + + loader = NeighborLoader( + (feature_store, graph_store), + [1], + input_nodes=torch.tensor([0, 1, 2], dtype=torch.int64), + batch_size=3, + weight_attr="bias", + ) + + out = list(iter(loader)) + assert len(out) == 1 + out = out[0] + + assert out.edge_index.shape[1] == 2 + assert (out.edge_index.cpu() == torch.tensor([[3, 4], [1, 2]])).all() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.sg +@pytest.mark.parametrize("num_nodes", [10, 25]) +@pytest.mark.parametrize("num_edges", [64, 128]) +@pytest.mark.parametrize("batch_size", [2, 4]) +@pytest.mark.parametrize("select_edges", [16, 32]) +@pytest.mark.parametrize("depth", [1, 3]) +@pytest.mark.parametrize("num_neighbors", [1, 4]) +def test_link_neighbor_loader_basic( + num_nodes, num_edges, batch_size, select_edges, num_neighbors, depth +): + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() + + eix = torch.randperm(num_edges)[:select_edges] + graph_store[("n", "e", "n"), "coo"] = torch.stack( + [ + torch.randint(0, num_nodes, (num_edges,)), + torch.randint(0, num_nodes, (num_edges,)), + ] + ) + + elx = graph_store[("n", "e", "n"), "coo"][:, eix] + loader = cugraph_pyg.loader.LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[num_neighbors] * depth, + edge_label_index=elx, + batch_size=batch_size, + shuffle=False, + ) + + elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) + for i, batch in enumerate(loader): + assert ( + batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) + ).all() + assert (elx[i] == batch.n_id[batch.edge_label_index.cpu()]).all() + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.sg +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_link_neighbor_loader_negative_sampling_basic(batch_size): + num_edges = 62 + num_nodes = 19 + select_edges = 17 + + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() + + eix = torch.randperm(num_edges)[:select_edges] + graph_store[("n", "e", "n"), "coo"] = torch.stack( + [ + torch.randint(0, num_nodes, (num_edges,)), + torch.randint(0, num_nodes, (num_edges,)), + ] + ) + + elx = graph_store[("n", "e", "n"), "coo"][:, eix] + loader = cugraph_pyg.loader.LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[3, 3, 3], + edge_label_index=elx, + batch_size=batch_size, + neg_sampling="binary", + shuffle=False, + ) + + elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) + for i, batch in enumerate(loader): + assert batch.edge_label[0] == 1.0 + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.sg +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_link_neighbor_loader_negative_sampling_uneven(batch_size): + num_edges = 62 + num_nodes = 19 + select_edges = 17 + + graph_store = GraphStore() + feature_store = TensorDictFeatureStore() + + eix = torch.randperm(num_edges)[:select_edges] + graph_store[("n", "e", "n"), "coo"] = torch.stack( + [ + torch.randint(0, num_nodes, (num_edges,)), + torch.randint(0, num_nodes, (num_edges,)), + ] + ) + + elx = graph_store[("n", "e", "n"), "coo"][:, eix] + loader = cugraph_pyg.loader.LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[3, 3, 3], + edge_label_index=elx, + batch_size=batch_size, + neg_sampling=torch_geometric.sampler.NegativeSampling("binary", amount=0.1), + shuffle=False, + ) + + elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) + for i, batch in enumerate(loader): + assert batch.edge_label[0] == 1.0 diff --git a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py index 6a5f46b0940..d1dee01a508 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/loader/test_neighbor_loader_mg.py @@ -19,7 +19,7 @@ from cugraph.utilities.utils import import_optional, MissingModule from cugraph_pyg.data import TensorDictFeatureStore, GraphStore -from cugraph_pyg.loader import NeighborLoader +from cugraph_pyg.loader import NeighborLoader, LinkNeighborLoader from cugraph.gnn import ( cugraph_comms_init, @@ -27,6 +27,8 @@ cugraph_comms_create_unique_id, ) +os.environ["RAPIDS_NO_INITIALIZE"] = "1" + torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") @@ -36,6 +38,7 @@ def init_pytorch_worker(rank, world_size, cugraph_id): rmm.reinitialize( devices=rank, + pool_allocator=False, ) import cupy @@ -93,6 +96,7 @@ def run_test_neighbor_loader_mg(rank, uid, world_size, specify_size): cugraph_comms_shutdown() +@pytest.mark.skip(reason="deleteme") @pytest.mark.parametrize("specify_size", [True, False]) @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") @pytest.mark.mg @@ -109,3 +113,252 @@ def test_neighbor_loader_mg(specify_size): ), nprocs=world_size, ) + + +def run_test_neighbor_loader_biased_mg(rank, uid, world_size): + init_pytorch_worker(rank, world_size, uid) + + eix = torch.stack( + [ + torch.arange( + 3 * (world_size + rank), + 3 * (world_size + rank + 1), + dtype=torch.int64, + device="cuda", + ), + torch.arange(3 * rank, 3 * (rank + 1), dtype=torch.int64, device="cuda"), + ] + ) + + graph_store = GraphStore(is_multi_gpu=True) + graph_store.put_edge_index(eix, ("person", "knows", "person"), "coo") + + feature_store = TensorDictFeatureStore() + feature_store["person", "feat"] = torch.randint(128, (6 * world_size, 12)) + feature_store[("person", "knows", "person"), "bias"] = torch.concat( + [torch.tensor([0, 1, 1], dtype=torch.float32) for _ in range(world_size)] + ) + + loader = NeighborLoader( + (feature_store, graph_store), + [1], + input_nodes=torch.arange( + 3 * rank, 3 * (rank + 1), dtype=torch.int64, device="cuda" + ), + batch_size=3, + weight_attr="bias", + ) + + out = list(iter(loader)) + assert len(out) == 1 + out = out[0] + + assert ( + out.edge_index.cpu() + == torch.tensor( + [ + [3, 4], + [1, 2], + ] + ) + ).all() + + cugraph_comms_shutdown() + + +@pytest.mark.skip(reason="deleteme") +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.mg +def test_neighbor_loader_biased_mg(): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_neighbor_loader_biased_mg, + args=( + uid, + world_size, + ), + nprocs=world_size, + ) + + +def run_test_link_neighbor_loader_basic_mg( + rank, + uid, + world_size, + num_nodes: int, + num_edges: int, + select_edges: int, + batch_size: int, + num_neighbors: int, + depth: int, +): + init_pytorch_worker(rank, world_size, uid) + + graph_store = GraphStore(is_multi_gpu=True) + feature_store = TensorDictFeatureStore() + + eix = torch.randperm(num_edges)[:select_edges] + graph_store[("n", "e", "n"), "coo"] = torch.stack( + [ + torch.randint(0, num_nodes, (num_edges,)), + torch.randint(0, num_nodes, (num_edges,)), + ] + ) + + elx = graph_store[("n", "e", "n"), "coo"][:, eix] + loader = LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[num_neighbors] * depth, + edge_label_index=elx, + batch_size=batch_size, + shuffle=False, + ) + + elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) + for i, batch in enumerate(loader): + assert ( + batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) + ).all() + assert (elx[i] == batch.n_id[batch.edge_label_index.cpu()]).all() + + cugraph_comms_shutdown() + + +@pytest.mark.skip(reason="deleteme") +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.mg +@pytest.mark.parametrize("select_edges", [64, 128]) +@pytest.mark.parametrize("batch_size", [2, 4]) +@pytest.mark.parametrize("depth", [1, 3]) +def test_link_neighbor_loader_basic_mg(select_edges, batch_size, depth): + num_nodes = 25 + num_edges = 128 + num_neighbors = 2 + + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_link_neighbor_loader_basic_mg, + args=( + uid, + world_size, + num_nodes, + num_edges, + select_edges, + batch_size, + num_neighbors, + depth, + ), + nprocs=world_size, + ) + + +def run_test_link_neighbor_loader_uneven_mg(rank, uid, world_size, edge_index): + init_pytorch_worker(rank, world_size, uid) + + graph_store = GraphStore(is_multi_gpu=True) + feature_store = TensorDictFeatureStore() + + batch_size = 1 + graph_store[("n", "e", "n"), "coo"] = torch.tensor_split( + edge_index, world_size, dim=-1 + )[rank] + + elx = graph_store[("n", "e", "n"), "coo"] # select all edges on each worker + loader = LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[2, 2, 2], + edge_label_index=elx, + batch_size=batch_size, + shuffle=False, + ) + + for i, batch in enumerate(loader): + assert ( + batch.input_id.cpu() == torch.arange(i * batch_size, (i + 1) * batch_size) + ).all() + + assert (elx[:, [i]] == batch.n_id[batch.edge_label_index.cpu()]).all() + + cugraph_comms_shutdown() + + +@pytest.mark.skip(reason="deleteme") +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.mg +def test_link_neighbor_loader_uneven_mg(): + edge_index = torch.tensor( + [ + [0, 1, 3, 4, 7], + [1, 0, 8, 9, 12], + ] + ) + + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_link_neighbor_loader_uneven_mg, + args=( + uid, + world_size, + edge_index, + ), + nprocs=world_size, + ) + + +def run_test_link_neighbor_loader_negative_sampling_basic_mg( + rank, world_size, uid, batch_size +): + num_edges = 62 * world_size + num_nodes = 19 * world_size + select_edges = 17 + + init_pytorch_worker(rank, world_size, uid) + + graph_store = GraphStore(is_multi_gpu=True) + feature_store = TensorDictFeatureStore() + + eix = torch.randperm(num_edges)[:select_edges] + graph_store[("n", "e", "n"), "coo"] = torch.stack( + [ + torch.randint(0, num_nodes, (num_edges,)), + torch.randint(0, num_nodes, (num_edges,)), + ] + ) + + elx = graph_store[("n", "e", "n"), "coo"][:, eix] + loader = LinkNeighborLoader( + (feature_store, graph_store), + num_neighbors=[3, 3, 3], + edge_label_index=elx, + batch_size=batch_size, + neg_sampling="binary", + shuffle=False, + ) + + elx = torch.tensor_split(elx, eix.numel() // batch_size, dim=1) + for i, batch in enumerate(loader): + assert batch.edge_label[0] == 1.0 + + +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.mg +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_link_neighbor_loader_negative_sampling_basic_mg(batch_size): + uid = cugraph_comms_create_unique_id() + world_size = torch.cuda.device_count() + + torch.multiprocessing.spawn( + run_test_link_neighbor_loader_negative_sampling_basic_mg, + args=( + world_size, + uid, + batch_size, + ), + nprocs=world_size, + ) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini b/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini new file mode 100644 index 00000000000..7b0a9f29fb1 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/tests/pytest.ini @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index b29c108e3f4..e157f36f8f6 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -20,19 +20,19 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dependencies = [ - "cugraph==24.10.*,>=0.0.0a0", + "cugraph==24.12.*,>=0.0.0a0", "numba>=0.57", - "numpy>=1.23,<2.0a0", - "pylibcugraphops==24.10.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", + "pylibcugraphops==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] @@ -42,14 +42,14 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/" [project.optional-dependencies] test = [ "pandas", - "pylibwholegraph==24.10.*,>=0.0.0a0", + "pylibwholegraph==24.12.*,>=0.0.0a0", "pytest", "pytest-benchmark", "pytest-cov", "pytest-xdist", "scipy", "tensordict>=0.1.2", - "torch>=2.0,<2.2.0a0", + "torch>=2.3,<2.4.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools] diff --git a/python/cugraph-pyg/pytest.ini b/python/cugraph-pyg/pytest.ini index db99a54ae49..07c4ffa0958 100644 --- a/python/cugraph-pyg/pytest.ini +++ b/python/cugraph-pyg/pytest.ini @@ -17,6 +17,7 @@ addopts = --benchmark-max-time=0 --benchmark-min-rounds=1 --benchmark-columns="mean, rounds" + --tb=native ## do not run slow tests/benchmarks by default -m "not slow" diff --git a/python/cugraph-service/client/pyproject.toml b/python/cugraph-service/client/pyproject.toml index 75deea88e2e..ac5e6bad0d5 100644 --- a/python/cugraph-service/client/pyproject.toml +++ b/python/cugraph-service/client/pyproject.toml @@ -18,16 +18,16 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "thriftpy2!=0.5.0,!=0.5.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.urls] diff --git a/python/cugraph-service/pytest.ini b/python/cugraph-service/pytest.ini index 6a0dd36ecec..f2ba9175f82 100644 --- a/python/cugraph-service/pytest.ini +++ b/python/cugraph-service/pytest.ini @@ -16,6 +16,7 @@ addopts = --benchmark-warmup=off --benchmark-max-time=0 --benchmark-min-rounds=1 --benchmark-columns="min, max, mean, rounds" + --tb=native ## for use with rapids-pytest-benchmark plugin #--benchmark-gpu-disable ## for use with pytest-cov plugin diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml index 2ae40911821..f388fd4c126 100644 --- a/python/cugraph-service/server/pyproject.toml +++ b/python/cugraph-service/server/pyproject.toml @@ -18,27 +18,27 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ - "cudf==24.10.*,>=0.0.0a0", - "cugraph-service-client==24.10.*,>=0.0.0a0", - "cugraph==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", + "cugraph-service-client==24.12.*,>=0.0.0a0", + "cugraph==24.12.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", - "dask-cuda==24.10.*,>=0.0.0a0", - "dask-cudf==24.10.*,>=0.0.0a0", + "dask-cuda==24.12.*,>=0.0.0a0", + "dask-cudf==24.12.*,>=0.0.0a0", "numba>=0.57", - "numpy>=1.23,<2.0a0", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", "thriftpy2!=0.5.0,!=0.5.1", - "ucx-py==0.40.*,>=0.0.0a0", + "ucx-py==0.41.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.scripts] @@ -47,7 +47,7 @@ cugraph-service-server = "cugraph_service_server.__main__:main" [project.optional-dependencies] test = [ "networkx>=2.5.1", - "numpy>=1.23,<2.0a0", + "numpy>=1.23,<3.0a0", "pandas", "pytest", "pytest-benchmark", diff --git a/python/cugraph-service/tests/pytest.ini b/python/cugraph-service/tests/pytest.ini new file mode 100644 index 00000000000..7b0a9f29fb1 --- /dev/null +++ b/python/cugraph-service/tests/pytest.ini @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native diff --git a/python/cugraph-service/tests/test_e2e.py b/python/cugraph-service/tests/test_e2e.py index c9b3d24f20e..3079a2423c7 100644 --- a/python/cugraph-service/tests/test_e2e.py +++ b/python/cugraph-service/tests/test_e2e.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -307,8 +307,8 @@ def test_load_call_unload_extension(client, extension1): assert len(results) == 2 assert len(results[0]) == 33 assert len(results[1]) == 21 - assert type(results[0][0]) == int - assert type(results[1][0]) == float + assert type(results[0][0]) is int + assert type(results[1][0]) is float assert results[0][0] == 9 assert results[1][0] == 9.0 diff --git a/python/cugraph-service/tests/test_mg_e2e.py b/python/cugraph-service/tests/test_mg_e2e.py index 39c1195151d..5526593aee0 100644 --- a/python/cugraph-service/tests/test_mg_e2e.py +++ b/python/cugraph-service/tests/test_mg_e2e.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -413,8 +413,8 @@ def test_call_extension_result_on_device( assert len(results) == 2 assert len(results[0]) == array1_len assert len(results[1]) == array2_len - assert type(results[0][0]) == int - assert type(results[1][0]) == float + assert type(results[0][0]) is int + assert type(results[1][0]) is float assert results[0][0] == 9 assert results[1][0] == 9.0 else: diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index dfccf02d042..ca38b5551c9 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -33,6 +33,7 @@ option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before de OFF ) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) +option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) if(NOT USE_CUGRAPH_OPS) message(STATUS "Disabling libcugraph functions that reference cugraph-ops") @@ -49,18 +50,39 @@ endif() include(rapids-cython-core) if(NOT cugraph_FOUND) + find_package(CUDAToolkit REQUIRED) + set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) set(CUDA_STATIC_RUNTIME ON) + set(CUDA_STATIC_MATH_LIBRARIES ON) set(USE_RAFT_STATIC ON) set(CUGRAPH_COMPILE_RAFT_LIB ON) set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) set(ALLOW_CLONE_CUGRAPH_OPS ON) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) + set(CUDA_STATIC_MATH_LIBRARIES OFF) + elseif(USE_CUDA_MATH_WHEELS) + message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") + endif() + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) + if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) + set(rpaths + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/cufft/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) + set_property(TARGET cugraph PROPERTY INSTALL_RPATH ${rpaths} APPEND) + endif() + set(cython_lib_dir cugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir}) endif() diff --git a/python/cugraph/cugraph/gnn/__init__.py b/python/cugraph/cugraph/gnn/__init__.py index b6c8e1981d0..5845f70ef7c 100644 --- a/python/cugraph/cugraph/gnn/__init__.py +++ b/python/cugraph/cugraph/gnn/__init__.py @@ -13,11 +13,13 @@ from .feature_storage.feat_storage import FeatureStore from .data_loading.bulk_sampler import BulkSampler -from .data_loading.dist_sampler import ( +from .data_loading import ( DistSampler, DistSampleWriter, DistSampleReader, + NeighborSampler, UniformNeighborSampler, + BiasedNeighborSampler, ) from .comms.cugraph_nccl_comms import ( cugraph_comms_init, diff --git a/python/cugraph/cugraph/gnn/data_loading/__init__.py b/python/cugraph/cugraph/gnn/data_loading/__init__.py index 98c547a0083..25f58be88aa 100644 --- a/python/cugraph/cugraph/gnn/data_loading/__init__.py +++ b/python/cugraph/cugraph/gnn/data_loading/__init__.py @@ -14,7 +14,26 @@ from cugraph.gnn.data_loading.bulk_sampler import BulkSampler from cugraph.gnn.data_loading.dist_sampler import ( DistSampler, + NeighborSampler, +) +from cugraph.gnn.data_loading.dist_io import ( DistSampleWriter, DistSampleReader, - UniformNeighborSampler, + BufferedSampleReader, ) + + +def UniformNeighborSampler(*args, **kwargs): + return NeighborSampler( + *args, + **kwargs, + biased=False, + ) + + +def BiasedNeighborSampler(*args, **kwargs): + return NeighborSampler( + *args, + **kwargs, + biased=True, + ) diff --git a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py index 6abbd82647b..222fb49a836 100644 --- a/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py +++ b/python/cugraph/cugraph/gnn/data_loading/bulk_sampler_io.py @@ -33,10 +33,12 @@ def create_df_from_disjoint_series(series_list: List[cudf.Series]): def create_df_from_disjoint_arrays(array_dict: Dict[str, cupy.array]): + series_dict = {} for k in list(array_dict.keys()): - array_dict[k] = cudf.Series(array_dict[k], name=k) + if array_dict[k] is not None: + series_dict[k] = cudf.Series(array_dict[k], name=k) - return create_df_from_disjoint_series(list(array_dict.values())) + return create_df_from_disjoint_series(list(series_dict.values())) def _write_samples_to_parquet_csr( diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_io/__init__.py b/python/cugraph/cugraph/gnn/data_loading/dist_io/__init__.py new file mode 100644 index 00000000000..29bb5489be2 --- /dev/null +++ b/python/cugraph/cugraph/gnn/data_loading/dist_io/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .reader import BufferedSampleReader, DistSampleReader +from .writer import DistSampleWriter diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_io/reader.py b/python/cugraph/cugraph/gnn/data_loading/dist_io/reader.py new file mode 100644 index 00000000000..69f909e7a8d --- /dev/null +++ b/python/cugraph/cugraph/gnn/data_loading/dist_io/reader.py @@ -0,0 +1,144 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import os +import re + +import cudf + +from typing import Callable, Iterator, Tuple, Dict, Optional + +from cugraph.utilities.utils import import_optional, MissingModule + +# Prevent PyTorch from being imported and causing an OOM error +torch = MissingModule("torch") + + +class DistSampleReader: + def __init__( + self, + directory: str, + *, + format: str = "parquet", + rank: Optional[int] = None, + filelist=None, + ): + torch = import_optional("torch") + + self.__format = format + self.__directory = directory + + if format != "parquet": + raise ValueError("Invalid format (currently supported: 'parquet')") + + if filelist is None: + files = os.listdir(directory) + ex = re.compile(r"batch\=([0-9]+)\.([0-9]+)\-([0-9]+)\.([0-9]+)\.parquet") + filematch = [ex.match(f) for f in files] + filematch = [f for f in filematch if f] + + if rank is not None: + filematch = [f for f in filematch if int(f[1]) == rank] + + batch_count = sum([int(f[4]) - int(f[2]) + 1 for f in filematch]) + filematch = sorted(filematch, key=lambda f: int(f[2]), reverse=True) + + self.__files = filematch + else: + self.__files = list(filelist) + + if rank is None: + self.__batch_count = batch_count + else: + # TODO maybe remove this in favor of warning users that they are + # probably going to cause a hang, instead of attempting to resolve + # the hang for them by dropping batches. + batch_count = torch.tensor([batch_count], device="cuda") + torch.distributed.all_reduce(batch_count, torch.distributed.ReduceOp.MIN) + self.__batch_count = int(batch_count) + + def __iter__(self): + return self + + def __next__(self) -> Tuple[Dict[str, "torch.Tensor"], int, int]: + torch = import_optional("torch") + + if len(self.__files) > 0: + f = self.__files.pop() + fname = f[0] + start_inclusive = int(f[2]) + end_inclusive = int(f[4]) + + if (end_inclusive - start_inclusive + 1) > self.__batch_count: + end_inclusive = start_inclusive + self.__batch_count - 1 + self.__batch_count = 0 + else: + self.__batch_count -= end_inclusive - start_inclusive + 1 + + df = cudf.read_parquet(os.path.join(self.__directory, fname)) + tensors = {} + for col in list(df.columns): + s = df[col].dropna() + if len(s) > 0: + tensors[col] = torch.as_tensor(s, device="cuda") + df.drop(col, axis=1, inplace=True) + + return tensors, start_inclusive, end_inclusive + + raise StopIteration + + +class BufferedSampleReader: + def __init__( + self, + nodes_call_groups: list["torch.Tensor"], + sample_fn: Callable[..., Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]], + *args, + **kwargs, + ): + self.__sample_args = args + self.__sample_kwargs = kwargs + + self.__nodes_call_groups = iter(nodes_call_groups) + self.__sample_fn = sample_fn + self.__current_call_id = 0 + self.__current_reader = None + + def __next__(self) -> Tuple[Dict[str, "torch.Tensor"], int, int]: + new_reader = False + + if self.__current_reader is None: + new_reader = True + else: + try: + out = next(self.__current_reader) + except StopIteration: + new_reader = True + + if new_reader: + # Will trigger StopIteration if there are no more call groups + self.__current_reader = self.__sample_fn( + self.__current_call_id, + next(self.__nodes_call_groups), + *self.__sample_args, + **self.__sample_kwargs, + ) + + self.__current_call_id += 1 + out = next(self.__current_reader) + + return out + + def __iter__(self) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: + return self diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_io/writer.py b/python/cugraph/cugraph/gnn/data_loading/dist_io/writer.py new file mode 100644 index 00000000000..f8ad4719a76 --- /dev/null +++ b/python/cugraph/cugraph/gnn/data_loading/dist_io/writer.py @@ -0,0 +1,321 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from math import ceil + + +import cupy + +from cugraph.utilities.utils import MissingModule +from cugraph.gnn.data_loading.dist_io import DistSampleReader + +from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays + +from typing import Iterator, Tuple, Dict + +torch = MissingModule("torch") + + +class DistSampleWriter: + def __init__( + self, + directory: str, + *, + batches_per_partition: int = 256, + format: str = "parquet", + ): + """ + Parameters + ---------- + directory: str (required) + The directory where samples will be written. This + writer can only write to disk. + batches_per_partition: int (optional, default=256) + The number of batches to write in a single file. + format: str (optional, default='parquet') + The file format of the output files containing the + sampled minibatches. Currently, only parquet format + is supported. + """ + if format != "parquet": + raise ValueError("Invalid format (currently supported: 'parquet')") + + self.__format = format + self.__directory = directory + self.__batches_per_partition = batches_per_partition + + @property + def _format(self): + return self.__format + + @property + def _directory(self): + return self.__directory + + @property + def _batches_per_partition(self): + return self.__batches_per_partition + + def get_reader( + self, rank: int + ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: + """ + Returns an iterator over sampled data. + """ + + # currently only disk reading is supported + return DistSampleReader(self._directory, format=self._format, rank=rank) + + def __write_minibatches_coo(self, minibatch_dict): + has_edge_ids = minibatch_dict["edge_id"] is not None + has_edge_types = minibatch_dict["edge_type"] is not None + has_weights = minibatch_dict["weight"] is not None + + if minibatch_dict["renumber_map"] is None: + raise ValueError( + "Distributed sampling without renumbering is not supported" + ) + + # Quit if there are no batches to write. + if len(minibatch_dict["batch_id"]) == 0: + return + + fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len( + minibatch_dict["batch_id"] + ) + + for p in range( + 0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition)) + ): + partition_start = p * (self.__batches_per_partition) + partition_end = (p + 1) * (self.__batches_per_partition) + + label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][ + partition_start * fanout_length : partition_end * fanout_length + 1 + ] + + batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end] + start_batch_id = batch_id_array_p[0] + + input_offsets_p = minibatch_dict["input_offsets"][ + partition_start : (partition_end + 1) + ] + input_index_p = minibatch_dict["input_index"][ + input_offsets_p[0] : input_offsets_p[-1] + ] + edge_inverse_p = ( + minibatch_dict["edge_inverse"][ + (input_offsets_p[0] * 2) : (input_offsets_p[-1] * 2) + ] + if "edge_inverse" in minibatch_dict + else None + ) + + start_ix, end_ix = label_hop_offsets_array_p[[0, -1]] + majors_array_p = minibatch_dict["majors"][start_ix:end_ix] + minors_array_p = minibatch_dict["minors"][start_ix:end_ix] + edge_id_array_p = ( + minibatch_dict["edge_id"][start_ix:end_ix] + if has_edge_ids + else cupy.array([], dtype="int64") + ) + edge_type_array_p = ( + minibatch_dict["edge_type"][start_ix:end_ix] + if has_edge_types + else cupy.array([], dtype="int32") + ) + weight_array_p = ( + minibatch_dict["weight"][start_ix:end_ix] + if has_weights + else cupy.array([], dtype="float32") + ) + + # create the renumber map offsets + renumber_map_offsets_array_p = minibatch_dict["renumber_map_offsets"][ + partition_start : partition_end + 1 + ] + + renumber_map_start_ix, renumber_map_end_ix = renumber_map_offsets_array_p[ + [0, -1] + ] + + renumber_map_array_p = minibatch_dict["renumber_map"][ + renumber_map_start_ix:renumber_map_end_ix + ] + + results_dataframe_p = create_df_from_disjoint_arrays( + { + "majors": majors_array_p, + "minors": minors_array_p, + "map": renumber_map_array_p, + "label_hop_offsets": label_hop_offsets_array_p, + "weight": weight_array_p, + "edge_id": edge_id_array_p, + "edge_type": edge_type_array_p, + "renumber_map_offsets": renumber_map_offsets_array_p, + "input_index": input_index_p, + "input_offsets": input_offsets_p, + "edge_inverse": edge_inverse_p, + } + ) + + end_batch_id = start_batch_id + len(batch_id_array_p) - 1 + rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0 + + full_output_path = os.path.join( + self.__directory, + f"batch={rank:05d}.{start_batch_id:08d}-" + f"{rank:05d}.{end_batch_id:08d}.parquet", + ) + + results_dataframe_p.to_parquet( + full_output_path, + compression=None, + index=False, + force_nullable_schema=True, + ) + + def __write_minibatches_csr(self, minibatch_dict): + has_edge_ids = minibatch_dict["edge_id"] is not None + has_edge_types = minibatch_dict["edge_type"] is not None + has_weights = minibatch_dict["weight"] is not None + + if minibatch_dict["renumber_map"] is None: + raise ValueError( + "Distributed sampling without renumbering is not supported" + ) + + # Quit if there are no batches to write. + if len(minibatch_dict["batch_id"]) == 0: + return + + fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len( + minibatch_dict["batch_id"] + ) + + for p in range( + 0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition)) + ): + partition_start = p * (self.__batches_per_partition) + partition_end = (p + 1) * (self.__batches_per_partition) + + label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][ + partition_start * fanout_length : partition_end * fanout_length + 1 + ] + + batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end] + start_batch_id = batch_id_array_p[0] + + input_offsets_p = minibatch_dict["input_offsets"][ + partition_start : (partition_end + 1) + ] + input_index_p = minibatch_dict["input_index"][ + input_offsets_p[0] : input_offsets_p[-1] + ] + edge_inverse_p = ( + minibatch_dict["edge_inverse"][ + (input_offsets_p[0] * 2) : (input_offsets_p[-1] * 2) + ] + if "edge_inverse" in minibatch_dict + else None + ) + + # major offsets and minors + ( + major_offsets_start_incl, + major_offsets_end_incl, + ) = label_hop_offsets_array_p[[0, -1]] + + start_ix, end_ix = minibatch_dict["major_offsets"][ + [major_offsets_start_incl, major_offsets_end_incl] + ] + + major_offsets_array_p = minibatch_dict["major_offsets"][ + major_offsets_start_incl : major_offsets_end_incl + 1 + ] + + minors_array_p = minibatch_dict["minors"][start_ix:end_ix] + edge_id_array_p = ( + minibatch_dict["edge_id"][start_ix:end_ix] + if has_edge_ids + else cupy.array([], dtype="int64") + ) + edge_type_array_p = ( + minibatch_dict["edge_type"][start_ix:end_ix] + if has_edge_types + else cupy.array([], dtype="int32") + ) + weight_array_p = ( + minibatch_dict["weight"][start_ix:end_ix] + if has_weights + else cupy.array([], dtype="float32") + ) + + # create the renumber map offsets + renumber_map_offsets_array_p = minibatch_dict["renumber_map_offsets"][ + partition_start : partition_end + 1 + ] + + renumber_map_start_ix, renumber_map_end_ix = renumber_map_offsets_array_p[ + [0, -1] + ] + + renumber_map_array_p = minibatch_dict["renumber_map"][ + renumber_map_start_ix:renumber_map_end_ix + ] + + results_dataframe_p = create_df_from_disjoint_arrays( + { + "major_offsets": major_offsets_array_p, + "minors": minors_array_p, + "map": renumber_map_array_p, + "label_hop_offsets": label_hop_offsets_array_p, + "weight": weight_array_p, + "edge_id": edge_id_array_p, + "edge_type": edge_type_array_p, + "renumber_map_offsets": renumber_map_offsets_array_p, + "input_index": input_index_p, + "input_offsets": input_offsets_p, + "edge_inverse": edge_inverse_p, + } + ) + + end_batch_id = start_batch_id + len(batch_id_array_p) - 1 + rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0 + + full_output_path = os.path.join( + self.__directory, + f"batch={rank:05d}.{start_batch_id:08d}-" + f"{rank:05d}.{end_batch_id:08d}.parquet", + ) + + results_dataframe_p.to_parquet( + full_output_path, + compression=None, + index=False, + force_nullable_schema=True, + ) + + def write_minibatches(self, minibatch_dict): + if (minibatch_dict["majors"] is not None) and ( + minibatch_dict["minors"] is not None + ): + self.__write_minibatches_coo(minibatch_dict) + elif (minibatch_dict["major_offsets"] is not None) and ( + minibatch_dict["minors"] is not None + ): + self.__write_minibatches_csr(minibatch_dict) + else: + raise ValueError("invalid columns") diff --git a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py index a5a84362a07..0ff38741e1a 100644 --- a/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py +++ b/python/cugraph/cugraph/gnn/data_loading/dist_sampler.py @@ -11,8 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import re import warnings from math import ceil from functools import reduce @@ -27,348 +25,19 @@ from cugraph.utilities.utils import import_optional, MissingModule from cugraph.gnn.comms import cugraph_comms_get_raft_handle -from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays + +from cugraph.gnn.data_loading.dist_io import BufferedSampleReader +from cugraph.gnn.data_loading.dist_io import DistSampleWriter torch = MissingModule("torch") TensorType = Union["torch.Tensor", cupy.ndarray, cudf.Series] -class DistSampleReader: - def __init__( - self, - directory: str, - *, - format: str = "parquet", - rank: Optional[int] = None, - filelist=None, - ): - torch = import_optional("torch") - - self.__format = format - self.__directory = directory - - if format != "parquet": - raise ValueError("Invalid format (currently supported: 'parquet')") - - if filelist is None: - files = os.listdir(directory) - ex = re.compile(r"batch\=([0-9]+)\.([0-9]+)\-([0-9]+)\.([0-9]+)\.parquet") - filematch = [ex.match(f) for f in files] - filematch = [f for f in filematch if f] - - if rank is not None: - filematch = [f for f in filematch if int(f[1]) == rank] - - batch_count = sum([int(f[4]) - int(f[2]) + 1 for f in filematch]) - filematch = sorted(filematch, key=lambda f: int(f[2]), reverse=True) - - self.__files = filematch - else: - self.__files = list(filelist) - - if rank is None: - self.__batch_count = batch_count - else: - batch_count = torch.tensor([batch_count], device="cuda") - torch.distributed.all_reduce(batch_count, torch.distributed.ReduceOp.MIN) - self.__batch_count = int(batch_count) - - def __iter__(self): - return self - - def __next__(self): - torch = import_optional("torch") - - if len(self.__files) > 0: - f = self.__files.pop() - fname = f[0] - start_inclusive = int(f[2]) - end_inclusive = int(f[4]) - - if (end_inclusive - start_inclusive + 1) > self.__batch_count: - end_inclusive = start_inclusive + self.__batch_count - 1 - self.__batch_count = 0 - else: - self.__batch_count -= end_inclusive - start_inclusive + 1 - - df = cudf.read_parquet(os.path.join(self.__directory, fname)) - tensors = {} - for col in list(df.columns): - s = df[col].dropna() - if len(s) > 0: - tensors[col] = torch.as_tensor(s, device="cuda") - df.drop(col, axis=1, inplace=True) - - return tensors, start_inclusive, end_inclusive - - raise StopIteration - - -class DistSampleWriter: - def __init__( - self, - directory: str, - *, - batches_per_partition: int = 256, - format: str = "parquet", - ): - """ - Parameters - ---------- - directory: str (required) - The directory where samples will be written. This - writer can only write to disk. - batches_per_partition: int (optional, default=256) - The number of batches to write in a single file. - format: str (optional, default='parquet') - The file format of the output files containing the - sampled minibatches. Currently, only parquet format - is supported. - """ - if format != "parquet": - raise ValueError("Invalid format (currently supported: 'parquet')") - - self.__format = format - self.__directory = directory - self.__batches_per_partition = batches_per_partition - - @property - def _format(self): - return self.__format - - @property - def _directory(self): - return self.__directory - - @property - def _batches_per_partition(self): - return self.__batches_per_partition - - def get_reader( - self, rank: int - ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: - """ - Returns an iterator over sampled data. - """ - - # currently only disk reading is supported - return DistSampleReader(self._directory, format=self._format, rank=rank) - - def __write_minibatches_coo(self, minibatch_dict): - has_edge_ids = minibatch_dict["edge_id"] is not None - has_edge_types = minibatch_dict["edge_type"] is not None - has_weights = minibatch_dict["weight"] is not None - - if minibatch_dict["renumber_map"] is None: - raise ValueError( - "Distributed sampling without renumbering is not supported" - ) - - # Quit if there are no batches to write. - if len(minibatch_dict["batch_id"]) == 0: - return - - fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len( - minibatch_dict["batch_id"] - ) - rank_batch_offset = minibatch_dict["batch_id"][0] - - for p in range( - 0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition)) - ): - partition_start = p * (self.__batches_per_partition) - partition_end = (p + 1) * (self.__batches_per_partition) - - label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][ - partition_start * fanout_length : partition_end * fanout_length + 1 - ] - - batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end] - start_batch_id = batch_id_array_p[0] - rank_batch_offset - - start_ix, end_ix = label_hop_offsets_array_p[[0, -1]] - majors_array_p = minibatch_dict["majors"][start_ix:end_ix] - minors_array_p = minibatch_dict["minors"][start_ix:end_ix] - edge_id_array_p = ( - minibatch_dict["edge_id"][start_ix:end_ix] - if has_edge_ids - else cupy.array([], dtype="int64") - ) - edge_type_array_p = ( - minibatch_dict["edge_type"][start_ix:end_ix] - if has_edge_types - else cupy.array([], dtype="int32") - ) - weight_array_p = ( - minibatch_dict["weight"][start_ix:end_ix] - if has_weights - else cupy.array([], dtype="float32") - ) - - # create the renumber map offsets - renumber_map_offsets_array_p = minibatch_dict["renumber_map_offsets"][ - partition_start : partition_end + 1 - ] - - renumber_map_start_ix, renumber_map_end_ix = renumber_map_offsets_array_p[ - [0, -1] - ] - - renumber_map_array_p = minibatch_dict["renumber_map"][ - renumber_map_start_ix:renumber_map_end_ix - ] - - results_dataframe_p = create_df_from_disjoint_arrays( - { - "majors": majors_array_p, - "minors": minors_array_p, - "map": renumber_map_array_p, - "label_hop_offsets": label_hop_offsets_array_p, - "weight": weight_array_p, - "edge_id": edge_id_array_p, - "edge_type": edge_type_array_p, - "renumber_map_offsets": renumber_map_offsets_array_p, - } - ) - - end_batch_id = start_batch_id + len(batch_id_array_p) - 1 - rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0 - - full_output_path = os.path.join( - self.__directory, - f"batch={rank:05d}.{start_batch_id:08d}-" - f"{rank:05d}.{end_batch_id:08d}.parquet", - ) - - results_dataframe_p.to_parquet( - full_output_path, - compression=None, - index=False, - force_nullable_schema=True, - ) - - def __write_minibatches_csr(self, minibatch_dict): - has_edge_ids = minibatch_dict["edge_id"] is not None - has_edge_types = minibatch_dict["edge_type"] is not None - has_weights = minibatch_dict["weight"] is not None - - if minibatch_dict["renumber_map"] is None: - raise ValueError( - "Distributed sampling without renumbering is not supported" - ) - - # Quit if there are no batches to write. - if len(minibatch_dict["batch_id"]) == 0: - return - - fanout_length = (len(minibatch_dict["label_hop_offsets"]) - 1) // len( - minibatch_dict["batch_id"] - ) - - for p in range( - 0, int(ceil(len(minibatch_dict["batch_id"]) / self.__batches_per_partition)) - ): - partition_start = p * (self.__batches_per_partition) - partition_end = (p + 1) * (self.__batches_per_partition) - - label_hop_offsets_array_p = minibatch_dict["label_hop_offsets"][ - partition_start * fanout_length : partition_end * fanout_length + 1 - ] - - batch_id_array_p = minibatch_dict["batch_id"][partition_start:partition_end] - start_batch_id = batch_id_array_p[0] - - # major offsets and minors - ( - major_offsets_start_incl, - major_offsets_end_incl, - ) = label_hop_offsets_array_p[[0, -1]] - - start_ix, end_ix = minibatch_dict["major_offsets"][ - [major_offsets_start_incl, major_offsets_end_incl] - ] - - major_offsets_array_p = minibatch_dict["major_offsets"][ - major_offsets_start_incl : major_offsets_end_incl + 1 - ] - - minors_array_p = minibatch_dict["minors"][start_ix:end_ix] - edge_id_array_p = ( - minibatch_dict["edge_id"][start_ix:end_ix] - if has_edge_ids - else cupy.array([], dtype="int64") - ) - edge_type_array_p = ( - minibatch_dict["edge_type"][start_ix:end_ix] - if has_edge_types - else cupy.array([], dtype="int32") - ) - weight_array_p = ( - minibatch_dict["weight"][start_ix:end_ix] - if has_weights - else cupy.array([], dtype="float32") - ) - - # create the renumber map offsets - renumber_map_offsets_array_p = minibatch_dict["renumber_map_offsets"][ - partition_start : partition_end + 1 - ] - - renumber_map_start_ix, renumber_map_end_ix = renumber_map_offsets_array_p[ - [0, -1] - ] - - renumber_map_array_p = minibatch_dict["renumber_map"][ - renumber_map_start_ix:renumber_map_end_ix - ] - - results_dataframe_p = create_df_from_disjoint_arrays( - { - "major_offsets": major_offsets_array_p, - "minors": minors_array_p, - "map": renumber_map_array_p, - "label_hop_offsets": label_hop_offsets_array_p, - "weight": weight_array_p, - "edge_id": edge_id_array_p, - "edge_type": edge_type_array_p, - "renumber_map_offsets": renumber_map_offsets_array_p, - } - ) - - end_batch_id = start_batch_id + len(batch_id_array_p) - 1 - rank = minibatch_dict["rank"] if "rank" in minibatch_dict else 0 - - full_output_path = os.path.join( - self.__directory, - f"batch={rank:05d}.{start_batch_id:08d}-" - f"{rank:05d}.{end_batch_id:08d}.parquet", - ) - - results_dataframe_p.to_parquet( - full_output_path, - compression=None, - index=False, - force_nullable_schema=True, - ) - - def write_minibatches(self, minibatch_dict): - if (minibatch_dict["majors"] is not None) and ( - minibatch_dict["minors"] is not None - ): - self.__write_minibatches_coo(minibatch_dict) - elif (minibatch_dict["major_offsets"] is not None) and ( - minibatch_dict["minors"] is not None - ): - self.__write_minibatches_csr(minibatch_dict) - else: - raise ValueError("invalid columns") - - class DistSampler: def __init__( self, graph: Union[pylibcugraph.SGGraph, pylibcugraph.MGGraph], - writer: DistSampleWriter, + writer: Optional[DistSampleWriter], local_seeds_per_call: int, retain_original_seeds: bool = False, ): @@ -379,7 +48,8 @@ def __init__( The pylibcugraph graph object that will be sampled. writer: DistSampleWriter (required) The writer responsible for writing samples to disk - or, in the future, device or host memory. + or; if None, then samples will be written to memory + instead. local_seeds_per_call: int The number of seeds on this rank this sampler will process in a single sampling call. Batches will @@ -402,14 +72,6 @@ def __init__( self.__handle = None self.__retain_original_seeds = retain_original_seeds - def get_reader(self) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: - """ - Returns an iterator over sampled data. - """ - torch = import_optional("torch") - rank = torch.distributed.get_rank() if self.is_multi_gpu else None - return self.__writer.get_reader(rank) - def sample_batches( self, seeds: TensorType, @@ -564,6 +226,108 @@ def get_start_batch_offset( else: return 0, input_size_is_equal + def __sample_from_nodes_func( + self, + call_id: int, + current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor"], + batch_id_start: int, + batch_size: int, + batches_per_call: int, + random_state: int, + assume_equal_input_size: bool, + ) -> Union[None, Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]]: + torch = import_optional("torch") + + current_seeds, current_ix = current_seeds_and_ix + + current_batches = torch.arange( + batch_id_start + call_id * batches_per_call, + batch_id_start + + call_id * batches_per_call + + int(ceil(len(current_seeds))) + + 1, + device="cuda", + dtype=torch.int32, + ) + + current_batches = current_batches.repeat_interleave(batch_size)[ + : len(current_seeds) + ] + + # do qr division to get the number of batch_size batches and the + # size of the last batch + num_full, last_count = divmod(len(current_seeds), batch_size) + input_offsets = torch.concatenate( + [ + torch.tensor([0], device="cuda", dtype=torch.int64), + torch.full((num_full,), batch_size, device="cuda", dtype=torch.int64), + torch.tensor([last_count], device="cuda", dtype=torch.int64) + if last_count > 0 + else torch.tensor([], device="cuda", dtype=torch.int64), + ] + ).cumsum(-1) + + minibatch_dict = self.sample_batches( + seeds=current_seeds, + batch_ids=current_batches, + random_state=random_state, + assume_equal_input_size=assume_equal_input_size, + ) + minibatch_dict["input_index"] = current_ix.cuda() + minibatch_dict["input_offsets"] = input_offsets + + if self.__writer is None: + # rename renumber_map -> map to match unbuffered format + minibatch_dict["map"] = minibatch_dict["renumber_map"] + del minibatch_dict["renumber_map"] + minibatch_dict = { + k: torch.as_tensor(v, device="cuda") + for k, v in minibatch_dict.items() + if v is not None + } + + return iter([(minibatch_dict, current_batches[0], current_batches[-1])]) + else: + self.__writer.write_minibatches(minibatch_dict) + return None + + def __get_call_groups( + self, + seeds: TensorType, + input_id: TensorType, + seeds_per_call: int, + assume_equal_input_size: bool = False, + ): + torch = import_optional("torch") + + # Split the input seeds into call groups. Each call group + # corresponds to one sampling call. A call group contains + # many batches. + seeds_call_groups = torch.split(seeds, seeds_per_call, dim=-1) + index_call_groups = torch.split(input_id, seeds_per_call, dim=-1) + + # Need to add empties to the list of call groups to handle the case + # where not all ranks have the same number of call groups. This + # prevents a hang since we need all ranks to make the same number + # of calls. + if not assume_equal_input_size: + num_call_groups = torch.tensor( + [len(seeds_call_groups)], device="cuda", dtype=torch.int32 + ) + torch.distributed.all_reduce( + num_call_groups, op=torch.distributed.ReduceOp.MAX + ) + seeds_call_groups = list(seeds_call_groups) + ( + [torch.tensor([], dtype=seeds.dtype, device="cuda")] + * (int(num_call_groups) - len(seeds_call_groups)) + ) + index_call_groups = list(index_call_groups) + ( + [torch.tensor([], dtype=torch.int64, device=input_id.device)] + * (int(num_call_groups) - len(index_call_groups)) + ) + + return seeds_call_groups, index_call_groups + def sample_from_nodes( self, nodes: TensorType, @@ -571,7 +335,8 @@ def sample_from_nodes( batch_size: int = 16, random_state: int = 62, assume_equal_input_size: bool = False, - ): + input_id: Optional[TensorType] = None, + ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: """ Performs node-based sampling. Accepts a list of seed nodes, and batch size. Splits the seed list into batches, then divides the batches into call groups @@ -587,61 +352,301 @@ def sample_from_nodes( The size of each batch. random_state: int The random seed to use for sampling. + assume_equal_input_size: bool + Whether the inputs across workers should be assumed to be equal in + dimension. Skips some checks if True. + input_id: Optional[TensorType] + Input ids corresponding to the original batch tensor, if it + was permuted prior to calling this function. If present, + will be saved with the samples. """ torch = import_optional("torch") nodes = torch.as_tensor(nodes, device="cuda") + num_seeds = nodes.numel() batches_per_call = self._local_seeds_per_call // batch_size actual_seeds_per_call = batches_per_call * batch_size - # Split the input seeds into call groups. Each call group - # corresponds to one sampling call. A call group contains - # many batches. - num_seeds = len(nodes) - nodes_call_groups = torch.split(nodes, actual_seeds_per_call) + if input_id is None: + input_id = torch.arange(num_seeds, dtype=torch.int64, device="cpu") local_num_batches = int(ceil(num_seeds / batch_size)) batch_id_start, input_size_is_equal = self.get_start_batch_offset( local_num_batches, assume_equal_input_size=assume_equal_input_size ) - # Need to add empties to the list of call groups to handle the case - # where not all nodes have the same number of call groups. This - # prevents a hang since we need all ranks to make the same number - # of calls. - if not input_size_is_equal: - num_call_groups = torch.tensor( - [len(nodes_call_groups)], device="cuda", dtype=torch.int32 - ) - torch.distributed.all_reduce( - num_call_groups, op=torch.distributed.ReduceOp.MAX + nodes_call_groups, index_call_groups = self.__get_call_groups( + nodes, + input_id, + actual_seeds_per_call, + assume_equal_input_size=input_size_is_equal, + ) + + sample_args = ( + batch_id_start, + batch_size, + batches_per_call, + random_state, + input_size_is_equal, + ) + + if self.__writer is None: + # Buffered sampling + return BufferedSampleReader( + zip(nodes_call_groups, index_call_groups), + self.__sample_from_nodes_func, + *sample_args, ) - nodes_call_groups = list(nodes_call_groups) + ( - [torch.tensor([], dtype=nodes.dtype, device="cuda")] - * (int(num_call_groups) - len(nodes_call_groups)) + else: + # Unbuffered sampling + for i, current_seeds_and_ix in enumerate( + zip(nodes_call_groups, index_call_groups) + ): + self.__sample_from_nodes_func( + i, + current_seeds_and_ix, + *sample_args, + ) + + # Return a reader that points to the stored samples + rank = torch.distributed.get_rank() if self.is_multi_gpu else None + return self.__writer.get_reader(rank) + + def __sample_from_edges_func( + self, + call_id: int, + current_seeds_and_ix: Tuple["torch.Tensor", "torch.Tensor"], + batch_id_start: int, + batch_size: int, + batches_per_call: int, + random_state: int, + assume_equal_input_size: bool, + ) -> Union[None, Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]]: + torch = import_optional("torch") + + current_seeds, current_ix = current_seeds_and_ix + num_seed_edges = current_ix.numel() + + # The index gets stored as-is regardless of what makes it into + # the final batch and in what order. + # do qr division to get the number of batch_size batches and the + # size of the last batch + num_whole_batches, last_count = divmod(num_seed_edges, batch_size) + input_offsets = torch.concatenate( + [ + torch.tensor([0], device="cuda", dtype=torch.int64), + torch.full( + (num_whole_batches,), batch_size, device="cuda", dtype=torch.int64 + ), + torch.tensor([last_count], device="cuda", dtype=torch.int64) + if last_count > 0 + else torch.tensor([], device="cuda", dtype=torch.int64), + ] + ).cumsum(-1) + + current_seeds, leftover_seeds = ( + current_seeds[:, : (batch_size * num_whole_batches)], + current_seeds[:, (batch_size * num_whole_batches) :], + ) + + # For input edges, we need to translate this into unique vertices + # for each batch. + # We start by reorganizing the seed and index tensors so we can + # determine the unique vertices. This results in the expected + # src-to-dst concatenation for each batch + current_seeds = torch.concat( + [ + current_seeds[0].reshape((-1, batch_size)), + current_seeds[1].reshape((-1, batch_size)), + ], + axis=-1, + ) + + # The returned unique values must be sorted or else the inverse won't line up + # In the future this may be a good target for a C++ function + # Each element is a tuple of (unique, index, inverse) + # The seeds must be presorted with a stable sort prior to calling + # unique_consecutive in order to support negative sampling. This is + # because if we put positive edges after negative ones, then we may + # inadvertently turn a true positive into a false negative. + y = ( + torch.sort( + t, + stable=True, ) + for t in current_seeds + ) + z = ((v, torch.sort(i)[1]) for v, i in y) - # Make a call to sample_batches for each call group - for i, current_seeds in enumerate(nodes_call_groups): - current_batches = torch.arange( - batch_id_start + i * batches_per_call, - batch_id_start + (i + 1) * batches_per_call, - device="cuda", - dtype=torch.int32, + u = [ + ( + torch.unique_consecutive( + t, + return_inverse=True, + ), + i, ) + for t, i in z + ] - current_batches = current_batches.repeat_interleave(batch_size)[ - : len(current_seeds) + if len(u) > 0: + current_seeds = torch.concat([a[0] for a, _ in u]) + current_inv = torch.concat([a[1][i] for a, i in u]) + current_batches = torch.concat( + [ + torch.full( + (a[0].numel(),), + i + batch_id_start + (call_id * batches_per_call), + device="cuda", + dtype=torch.int32, + ) + for i, (a, _) in enumerate(u) + ] + ) + else: + current_seeds = torch.tensor([], device="cuda", dtype=torch.int64) + current_inv = torch.tensor([], device="cuda", dtype=torch.int64) + current_batches = torch.tensor([], device="cuda", dtype=torch.int32) + del u + + # Join with the leftovers + leftover_seeds, lyi = torch.sort( + leftover_seeds.flatten(), + stable=True, + ) + lz = torch.sort(lyi)[1] + leftover_seeds, lui = leftover_seeds.unique_consecutive(return_inverse=True) + leftover_inv = lui[lz] + + current_seeds = torch.concat([current_seeds, leftover_seeds]) + current_inv = torch.concat([current_inv, leftover_inv]) + current_batches = torch.concat( + [ + current_batches, + torch.full( + (leftover_seeds.numel(),), + (current_batches[-1] + 1) if current_batches.numel() > 0 else 0, + device="cuda", + dtype=torch.int32, + ), ] + ) + del leftover_seeds + del lz + del lui + + minibatch_dict = self.sample_batches( + seeds=current_seeds, + batch_ids=current_batches, + random_state=random_state, + assume_equal_input_size=assume_equal_input_size, + ) + minibatch_dict["input_index"] = current_ix.cuda() + minibatch_dict["input_offsets"] = input_offsets + minibatch_dict[ + "edge_inverse" + ] = current_inv # (2 * batch_size) entries per batch + + if self.__writer is None: + # rename renumber_map -> map to match unbuffered format + minibatch_dict["map"] = minibatch_dict["renumber_map"] + del minibatch_dict["renumber_map"] + minibatch_dict = { + k: torch.as_tensor(v, device="cuda") + for k, v in minibatch_dict.items() + if v is not None + } + + return iter([(minibatch_dict, current_batches[0], current_batches[-1])]) + else: + self.__writer.write_minibatches(minibatch_dict) + return None - minibatch_dict = self.sample_batches( - seeds=current_seeds, - batch_ids=current_batches, - random_state=random_state, - assume_equal_input_size=input_size_is_equal, + def sample_from_edges( + self, + edges: TensorType, + *, + batch_size: int = 16, + random_state: int = 62, + assume_equal_input_size: bool = False, + input_id: Optional[TensorType] = None, + ) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]: + """ + Performs sampling starting from seed edges. + + Parameters + ---------- + edges: TensorType + 2 x (# edges) tensor of edges to sample from. + Standard src/dst format. This will be converted + to a list of seed nodes. + batch_size: int + The size of each batch. + random_state: int + The random seed to use for sampling. + assume_equal_input_size: bool + Whether this function should assume that inputs + are equal across ranks. Skips some potentially + slow steps if True. + input_id: Optional[TensorType] + Input ids corresponding to the original batch tensor, if it + was permuted prior to calling this function. If present, + will be saved with the samples. + """ + + torch = import_optional("torch") + + edges = torch.as_tensor(edges, device="cuda") + num_seed_edges = edges.shape[-1] + + batches_per_call = self._local_seeds_per_call // batch_size + actual_seed_edges_per_call = batches_per_call * batch_size + + if input_id is None: + input_id = torch.arange(len(edges), dtype=torch.int64, device="cpu") + + local_num_batches = int(ceil(num_seed_edges / batch_size)) + batch_id_start, input_size_is_equal = self.get_start_batch_offset( + local_num_batches, assume_equal_input_size=assume_equal_input_size + ) + + edges_call_groups, index_call_groups = self.__get_call_groups( + edges, + input_id, + actual_seed_edges_per_call, + assume_equal_input_size=input_size_is_equal, + ) + + sample_args = ( + batch_id_start, + batch_size, + batches_per_call, + random_state, + input_size_is_equal, + ) + + if self.__writer is None: + # Buffered sampling + return BufferedSampleReader( + zip(edges_call_groups, index_call_groups), + self.__sample_from_edges_func, + *sample_args, ) - self.__writer.write_minibatches(minibatch_dict) + else: + # Unbuffered sampling + for i, current_seeds_and_ix in enumerate( + zip(edges_call_groups, index_call_groups) + ): + self.__sample_from_edges_func( + i, + current_seeds_and_ix, + *sample_args, + ) + + # Return a reader that points to the stored samples + rank = torch.distributed.get_rank() if self.is_multi_gpu else None + return self.__writer.get_reader(rank) @property def is_multi_gpu(self): @@ -671,7 +676,7 @@ def _retain_original_seeds(self): return self.__retain_original_seeds -class UniformNeighborSampler(DistSampler): +class NeighborSampler(DistSampler): # Number of vertices in the output minibatch, based # on benchmarking. BASE_VERTICES_PER_BYTE = 0.1107662486009992 @@ -693,6 +698,7 @@ def __init__( compression: str = "COO", compress_per_hop: bool = False, with_replacement: bool = False, + biased: bool = False, ): self.__fanout = fanout self.__prior_sources_behavior = prior_sources_behavior @@ -701,6 +707,18 @@ def __init__( self.__compression = compression self.__with_replacement = with_replacement + # It is currently required that graphs are weighted for biased + # sampling. So setting the function here is safe. In the future, + # if libcugraph allows setting a new attribute, this API might + # change. + # TODO allow func to be a call to a future remote sampling API + # if the provided graph is in another process (rapidsai/cugraph#4623). + self.__func = ( + pylibcugraph.biased_neighbor_sample + if biased + else pylibcugraph.uniform_neighbor_sample + ) + super().__init__( graph, writer, @@ -713,14 +731,12 @@ def __calc_local_seeds_per_call(self, local_seeds_per_call: Optional[int] = None if local_seeds_per_call is None: if len([x for x in self.__fanout if x <= 0]) > 0: - return UniformNeighborSampler.UNKNOWN_VERTICES_DEFAULT + return NeighborSampler.UNKNOWN_VERTICES_DEFAULT total_memory = torch.cuda.get_device_properties(0).total_memory fanout_prod = reduce(lambda x, y: x * y, self.__fanout) return int( - UniformNeighborSampler.BASE_VERTICES_PER_BYTE - * total_memory - / fanout_prod + NeighborSampler.BASE_VERTICES_PER_BYTE * total_memory / fanout_prod ) return local_seeds_per_call @@ -755,7 +771,7 @@ def sample_batches( else: label_offsets = None - sampling_results_dict = pylibcugraph.uniform_neighbor_sample( + sampling_results_dict = self.__func( self._resource_handle, self._graph, start_list=cupy.asarray(seeds), @@ -764,7 +780,7 @@ def sample_batches( label_to_output_comm_rank=cupy.asarray(label_to_output_comm_rank), h_fan_out=np.array(self.__fanout, dtype="int32"), with_replacement=self.__with_replacement, - do_expensive_check=True, + do_expensive_check=False, with_edge_properties=True, random_state=random_state + rank, prior_sources_behavior=self.__prior_sources_behavior, @@ -795,7 +811,7 @@ def sample_batches( else: label_offsets = None - sampling_results_dict = pylibcugraph.uniform_neighbor_sample( + sampling_results_dict = self.__func( self._resource_handle, self._graph, start_list=cupy.asarray(seeds), diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index d83f88c0c96..bc5cca67c2e 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -255,6 +255,13 @@ def __from_edgelist( elif elist[source].dtype not in [np.int32, np.int64] or elist[ destination ].dtype not in [np.int32, np.int64]: + if elist[destination].dtype in [np.uint32, np.uint64] or elist[ + source + ].dtype in [np.uint32, np.uint64]: + raise ValueError( + "Unsigned integers are not supported as vertex ids." + " Either convert to signed integers or set renumber=True" + ) raise ValueError("set renumber to True for non integer columns ids") # The dataframe will be symmetrized iff the graph is undirected diff --git a/python/cugraph/cugraph/structure/hypergraph.py b/python/cugraph/cugraph/structure/hypergraph.py index add68cb6dac..bdc98333da0 100644 --- a/python/cugraph/cugraph/structure/hypergraph.py +++ b/python/cugraph/cugraph/structure/hypergraph.py @@ -440,6 +440,7 @@ def _create_hyper_edges( for key, col in events[columns].items(): cat = categories.get(key, key) fs = [EVENTID] + ([key] if drop_edge_attrs else edge_attrs) + fs = list(set(fs)) df = events[fs].dropna(subset=[key]) if dropna else events[fs] if len(df) == 0: continue @@ -464,8 +465,7 @@ def _create_hyper_edges( if not drop_edge_attrs: columns += edge_attrs - edges = cudf.concat(edges)[columns] - edges.reset_index(drop=True, inplace=True) + edges = cudf.concat(edges, ignore_index=True)[list(set(columns))] return edges @@ -546,6 +546,7 @@ def _create_direct_edges( for key2, col2 in events[sorted(edge_shape[key1])].items(): cat2 = categories.get(key2, key2) fs = [EVENTID] + ([key1, key2] if drop_edge_attrs else edge_attrs) + fs = list(set(fs)) df = events[fs].dropna(subset=[key1, key2]) if dropna else events[fs] if len(df) == 0: continue @@ -573,20 +574,22 @@ def _create_direct_edges( if not drop_edge_attrs: columns += edge_attrs - edges = cudf.concat(edges)[columns] + edges = cudf.concat(edges)[list(set(columns))] edges.reset_index(drop=True, inplace=True) return edges def _str_scalar_to_category(size, val): - return cudf.core.column.build_categorical_column( - categories=cudf.core.column.as_column([val], dtype="str"), - codes=cudf.core.column.as_column(0, length=size, dtype=np.int32), - mask=None, + return cudf.core.column.CategoricalColumn( + data=None, size=size, + dtype=cudf.CategoricalDtype( + categories=cudf.core.column.as_column([val], dtype="str"), ordered=False + ), + mask=None, offset=0, null_count=0, - ordered=False, + children=(cudf.core.column.as_column(0, length=size, dtype=np.int32),), ) diff --git a/python/cugraph/cugraph/testing/resultset.py b/python/cugraph/cugraph/testing/resultset.py index 9570d7f3e04..f557ad13089 100644 --- a/python/cugraph/cugraph/testing/resultset.py +++ b/python/cugraph/cugraph/testing/resultset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import warnings import tarfile import urllib.request @@ -108,7 +109,11 @@ def load_resultset(resultset_name, resultset_download_url): if not compressed_file_path.exists(): urllib.request.urlretrieve(resultset_download_url, compressed_file_path) tar = tarfile.open(str(compressed_file_path), "r:gz") - tar.extractall(str(curr_resultset_download_dir)) + # TODO: pass filter="fully_trusted" when minimum supported Python version >=3.12 + # ref: https://docs.python.org/3/library/tarfile.html#tarfile-extraction-filter + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=DeprecationWarning) + tar.extractall(str(curr_resultset_download_dir)) tar.close() # FIXME: This assumes separator is " ", but should this be configurable? diff --git a/python/cugraph/cugraph/tests/data_store/test_property_graph.py b/python/cugraph/cugraph/tests/data_store/test_property_graph.py index da5608e0193..50f08cdf3d0 100644 --- a/python/cugraph/cugraph/tests/data_store/test_property_graph.py +++ b/python/cugraph/cugraph/tests/data_store/test_property_graph.py @@ -2576,9 +2576,10 @@ def bench_extract_subgraph_for_rmat(gpubenchmark, rmat_PropertyGraph): scn = PropertyGraph.src_col_name dcn = PropertyGraph.dst_col_name - verts = [] - for i in range(0, 10000, 10): - verts.append(generated_df["src"].iloc[i]) + # Build a query string to extract a graph with only specific edges based on + # the integer vertex IDs. Other edge and/or vertex properties can be + # included in the query as well. + verts = [int(generated_df["src"].iloc[i]) for i in range(0, 10000, 10)] selected_edges = pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})") gpubenchmark( @@ -2618,9 +2619,10 @@ def bench_extract_subgraph_for_rmat_detect_duplicate_edges( scn = PropertyGraph.src_col_name dcn = PropertyGraph.dst_col_name - verts = [] - for i in range(0, 10000, 10): - verts.append(generated_df["src"].iloc[i]) + # Build a query string to extract a graph with only specific edges based on + # the integer vertex IDs. Other edge and/or vertex properties can be + # included in the query as well. + verts = [int(generated_df["src"].iloc[i]) for i in range(0, 10000, 10)] selected_edges = pG.select_edges(f"{scn}.isin({verts}) | {dcn}.isin({verts})") diff --git a/python/cugraph/cugraph/tests/generators/test_rmat.py b/python/cugraph/cugraph/tests/generators/test_rmat.py index 1cee0461686..87cbe636fdc 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat.py @@ -27,7 +27,9 @@ _scale_values = [2, 4, 16] _scale_test_ids = [f"scale={x}" for x in _scale_values] _graph_types = [cugraph.Graph, None, int] -_graph_test_ids = [f"create_using={getattr(x,'__name__',str(x))}" for x in _graph_types] +_graph_test_ids = [ + f"create_using={getattr(x, '__name__', str(x))}" for x in _graph_types +] _clip_and_flip = [False, True] _clip_and_flip_test_ids = [f"clip_and_flip={x}" for x in _clip_and_flip] _scramble_vertex_ids = [False, True] diff --git a/python/cugraph/cugraph/tests/generators/test_rmat_mg.py b/python/cugraph/cugraph/tests/generators/test_rmat_mg.py index 0e1808d2f80..44a6b3a2fc1 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat_mg.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,7 +34,9 @@ _scale_values = [2, 4, 16] _scale_test_ids = [f"scale={x}" for x in _scale_values] _graph_types = [cugraph.Graph, None, int] -_graph_test_ids = [f"create_using={getattr(x,'__name__',str(x))}" for x in _graph_types] +_graph_test_ids = [ + f"create_using={getattr(x, '__name__', str(x))}" for x in _graph_types +] def _call_rmat(scale, num_edges, create_using, mg=True): diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 65bcce78771..3c5d6428001 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -119,7 +119,7 @@ def test_bulk_sampler_remainder(scratch_dir): assert b in recovered_samples["batch_id"].values_host.tolist() for x in range(0, 6, 2): - subdir = f"{x}-{x+1}" + subdir = f"{x}-{x + 1}" df = cudf.read_parquet(os.path.join(samples_path, f"batch={subdir}.parquet")) assert ((df.batch_id == x) | (df.batch_id == (x + 1))).all() diff --git a/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py b/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py index 965f731d328..64db0232fb1 100644 --- a/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_dist_sampler.py @@ -20,6 +20,7 @@ from cugraph.datasets import karate from cugraph.gnn import UniformNeighborSampler, DistSampleWriter +from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays from pylibcugraph import SGGraph, ResourceHandle, GraphProperties @@ -41,7 +42,7 @@ @pytest.fixture -def karate_graph(): +def karate_graph() -> SGGraph: el = karate.get_edgelist().reset_index().rename(columns={"index": "eid"}) G = SGGraph( ResourceHandle(), @@ -78,11 +79,13 @@ def test_dist_sampler_simple( ) recovered_samples = cudf.read_parquet(samples_path) + print(recovered_samples) original_el = karate.get_edgelist() for b in range(len(seeds) // batch_size): el_start = recovered_samples.label_hop_offsets.iloc[b * len(fanout)] el_end = recovered_samples.label_hop_offsets.iloc[(b + 1) * len(fanout)] + print(el_start, el_end) src = recovered_samples.majors.iloc[el_start:el_end] dst = recovered_samples.minors.iloc[el_start:el_end] edge_id = recovered_samples.edge_id.iloc[el_start:el_end] @@ -99,3 +102,60 @@ def test_dist_sampler_simple( assert original_el.dst.iloc[edge_id.iloc[i]] == dst.iloc[i] shutil.rmtree(samples_path) + + +@pytest.mark.sg +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.parametrize("seeds_per_call", [4, 5, 10]) +@pytest.mark.parametrize("compression", ["COO", "CSR"]) +def test_dist_sampler_buffered_in_memory( + scratch_dir: str, karate_graph: SGGraph, seeds_per_call: int, compression: str +): + G = karate_graph + + samples_path = os.path.join(scratch_dir, "test_bulk_sampler_buffered_in_memory") + create_directory_with_overwrite(samples_path) + + seeds = cupy.arange(10, dtype="int64") + + unbuffered_sampler = UniformNeighborSampler( + G, + writer=DistSampleWriter(samples_path), + local_seeds_per_call=seeds_per_call, + compression=compression, + ) + + buffered_sampler = UniformNeighborSampler( + G, + writer=None, + local_seeds_per_call=seeds_per_call, + compression=compression, + ) + + unbuffered_results = unbuffered_sampler.sample_from_nodes( + seeds, + batch_size=4, + ) + + unbuffered_results = [ + (create_df_from_disjoint_arrays(r[0]), r[1], r[2]) for r in unbuffered_results + ] + + buffered_results = buffered_sampler.sample_from_nodes(seeds, batch_size=4) + buffered_results = [ + (create_df_from_disjoint_arrays(r[0]), r[1], r[2]) for r in buffered_results + ] + + assert len(buffered_results) == len(unbuffered_results) + + for k in range(len(buffered_results)): + br, bs, be = buffered_results[k] + ur, us, ue = unbuffered_results[k] + + assert bs == us + assert be == ue + + for col in ur.columns: + assert (br[col].dropna() == ur[col].dropna()).all() + + shutil.rmtree(samples_path) diff --git a/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py index a1c32938994..5bb541d6cf3 100644 --- a/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_dist_sampler_mg.py @@ -18,6 +18,8 @@ import cupy import cudf +from typing import Any + from cugraph.datasets import karate from cugraph.gnn import ( UniformNeighborSampler, @@ -27,6 +29,7 @@ cugraph_comms_init, cugraph_comms_shutdown, ) +from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays from pylibcugraph import MGGraph, ResourceHandle, GraphProperties from cugraph.utilities.utils import ( @@ -235,3 +238,80 @@ def test_dist_sampler_uneven(scratch_dir, batch_size, fanout, seeds_per_call): assert original_el.dst.iloc[edge_id.iloc[i]] == dst.iloc[i] shutil.rmtree(samples_path) + + +def run_test_dist_sampler_buffered_in_memory( + rank: int, + world_size: int, + uid: Any, + samples_path: str, + seeds_per_call: int, + compression: str, +): + init_pytorch(rank, world_size) + cugraph_comms_init(rank, world_size, uid, device=rank) + + G = karate_mg_graph(rank, world_size) + + num_seeds = 8 + seeds = cupy.random.randint(0, 34, num_seeds, dtype="int64") + + unbuffered_sampler = UniformNeighborSampler( + G, + writer=DistSampleWriter(samples_path), + local_seeds_per_call=seeds_per_call, + compression=compression, + ) + + buffered_sampler = UniformNeighborSampler( + G, + writer=None, + local_seeds_per_call=seeds_per_call, + compression=compression, + ) + + unbuffered_results = unbuffered_sampler.sample_from_nodes( + seeds, + batch_size=4, + ) + + unbuffered_results = [ + (create_df_from_disjoint_arrays(r[0]), r[1], r[2]) for r in unbuffered_results + ] + + buffered_results = buffered_sampler.sample_from_nodes(seeds, batch_size=4) + buffered_results = [ + (create_df_from_disjoint_arrays(r[0]), r[1], r[2]) for r in buffered_results + ] + + assert len(buffered_results) == len(unbuffered_results) + + for k in range(len(buffered_results)): + br, bs, be = buffered_results[k] + ur, us, ue = unbuffered_results[k] + + assert bs == us + assert be == ue + + for col in ur.columns: + assert (br[col].dropna() == ur[col].dropna()).all() + + +@pytest.mark.mg +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.parametrize("seeds_per_call", [4, 5, 10]) +@pytest.mark.parametrize("compression", ["COO", "CSR"]) +def test_dist_sampler_buffered_in_memory(scratch_dir, seeds_per_call, compression): + uid = cugraph_comms_create_unique_id() + + samples_path = os.path.join(scratch_dir, "test_bulk_sampler_buffered_in_memory_mg") + create_directory_with_overwrite(samples_path) + + world_size = torch.cuda.device_count() + torch.multiprocessing.spawn( + run_test_dist_sampler_buffered_in_memory, + args=(world_size, uid, samples_path, seeds_per_call, compression), + nprocs=world_size, + ) + + shutil.rmtree(samples_path) diff --git a/python/cugraph/cugraph/tests/structure/test_graph_mg.py b/python/cugraph/cugraph/tests/structure/test_graph_mg.py index cba61731e9a..f2cc1583f93 100644 --- a/python/cugraph/cugraph/tests/structure/test_graph_mg.py +++ b/python/cugraph/cugraph/tests/structure/test_graph_mg.py @@ -303,7 +303,7 @@ def test_mg_graph_serializable(dask_client, input_combo): G = input_combo["MGGraph"] dask_client.publish_dataset(shared_g=G) shared_g = dask_client.get_dataset("shared_g") - assert type(shared_g) == type(G) + assert type(shared_g) is type(G) assert G.number_of_vertices() == shared_g.number_of_vertices() assert G.number_of_edges() == shared_g.number_of_edges() # cleanup @@ -314,7 +314,7 @@ def test_mg_graph_serializable(dask_client, input_combo): def test_mg_graph_copy(): G = cugraph.MultiGraph(directed=True) G_c = copy.deepcopy(G) - assert type(G) == type(G_c) + assert type(G) is type(G_c) @pytest.mark.mg diff --git a/python/cugraph/cugraph/tests/structure/test_hypergraph.py b/python/cugraph/cugraph/tests/structure/test_hypergraph.py index 848f31b940f..f1dfc17a509 100644 --- a/python/cugraph/cugraph/tests/structure/test_hypergraph.py +++ b/python/cugraph/cugraph/tests/structure/test_hypergraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -171,7 +171,8 @@ def test_hyperedges(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 12), ("nodes", 15), ("edges", 12), ("events", 3)]: assert len(h[k]) == v @@ -266,7 +267,8 @@ def test_drop_edge_attrs(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 9), ("nodes", 12), ("edges", 9), ("events", 3)]: assert len(h[k]) == v @@ -308,7 +310,8 @@ def test_drop_edge_attrs_direct(categorical_metadata): if categorical_metadata: edges = edges.astype({"edge_type": "category"}) - assert_frame_equal(edges, h["edges"], check_dtype=False) + # check_like ignores the order of columns as long as all correct ones are present + assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True) for (k, v) in [("entities", 9), ("nodes", 9), ("edges", 6), ("events", 0)]: assert len(h[k]) == v diff --git a/python/cugraph/cugraph/tests/traversal/test_sssp.py b/python/cugraph/cugraph/tests/traversal/test_sssp.py index 58288e022e8..ceb6040275d 100644 --- a/python/cugraph/cugraph/tests/traversal/test_sssp.py +++ b/python/cugraph/cugraph/tests/traversal/test_sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -486,7 +486,7 @@ def test_scipy_api_compat(): distances = cugraph.shortest_path( input_coo_matrix, source=0, return_predecessors=False ) - assert type(distances) != tuple + assert type(distances) is not tuple with pytest.raises(ValueError): cugraph.shortest_path(input_coo_matrix, source=0, unweighted=False) diff --git a/python/cugraph/cugraph/traversal/sssp.py b/python/cugraph/cugraph/traversal/sssp.py index 5ab97e60390..bb98b5a9a29 100644 --- a/python/cugraph/cugraph/traversal/sssp.py +++ b/python/cugraph/cugraph/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,7 +36,7 @@ def _ensure_args( # checks common to all input types if (method is not None) and (method != "auto"): raise ValueError("only 'auto' is currently accepted for method") - if (indices is not None) and (type(indices) == list): + if (indices is not None) and (type(indices) is list): raise ValueError("indices currently cannot be a list-like type") if (indices is not None) and (source is not None): raise TypeError("cannot specify both 'source' and 'indices'") @@ -70,9 +70,11 @@ def _ensure_args( # Check for non-Graph-type inputs else: - if (directed is not None) and (type(directed) != bool): + if (directed is not None) and (type(directed) is not bool): raise ValueError("'directed' must be a bool") - if (return_predecessors is not None) and (type(return_predecessors) != bool): + if (return_predecessors is not None) and ( + type(return_predecessors) is not bool + ): raise ValueError("'return_predecessors' must be a bool") if (unweighted is not None) and (unweighted is not True): raise ValueError("'unweighted' currently must be True if " "specified") diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py index 7a54a0bf2cf..69616f26857 100644 --- a/python/cugraph/cugraph/utilities/utils.py +++ b/python/cugraph/cugraph/utilities/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -523,6 +523,7 @@ def create_list_series_from_2d_ar(ar, index): mask_col = cp.full(shape=n_rows, fill_value=True) mask = cudf._lib.transform.bools_to_mask(as_column(mask_col)) lc = cudf.core.column.ListColumn( + data=None, size=n_rows, dtype=cudf.ListDtype(data.dtype), mask=mask, @@ -530,7 +531,7 @@ def create_list_series_from_2d_ar(ar, index): null_count=0, children=(offset_col, data), ) - return cudf.Series(lc, index=index) + return cudf.Series._from_column(lc, index=index) def create_directory_with_overwrite(directory): diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml index bbb89b03697..8185a8d915d 100644 --- a/python/cugraph/pyproject.toml +++ b/python/cugraph/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cython>=3.0.0", "rapids-build-backend>=0.3.1,<0.4.0.dev0", - "scikit-build-core[pyproject]>=0.7.0", + "scikit-build-core[pyproject]>=0.10.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "rapids_build_backend.build" @@ -21,35 +21,35 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ - "cudf==24.10.*,>=0.0.0a0", + "cudf==24.12.*,>=0.0.0a0", "cupy-cuda11x>=12.0.0", - "dask-cuda==24.10.*,>=0.0.0a0", - "dask-cudf==24.10.*,>=0.0.0a0", + "dask-cuda==24.12.*,>=0.0.0a0", + "dask-cudf==24.12.*,>=0.0.0a0", "fsspec[http]>=0.6.0", "numba>=0.57", - "numpy>=1.23,<2.0a0", - "pylibcugraph==24.10.*,>=0.0.0a0", - "raft-dask==24.10.*,>=0.0.0a0", - "rapids-dask-dependency==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", - "ucx-py==0.40.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", + "pylibcugraph==24.12.*,>=0.0.0a0", + "raft-dask==24.12.*,>=0.0.0a0", + "rapids-dask-dependency==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", + "ucx-py==0.41.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] test = [ "networkx>=2.5.1", - "numpy>=1.23,<2.0a0", + "numpy>=1.23,<3.0a0", "pandas", - "pylibwholegraph==24.10.*,>=0.0.0a0", + "pylibwholegraph==24.12.*,>=0.0.0a0", "pytest", "pytest-benchmark", "pytest-cov", @@ -66,7 +66,8 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/" [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" -cmake.minimum-version = "3.26.4" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" ninja.make-fallback = true sdist.reproducible = true wheel.packages = ["cugraph"] @@ -81,9 +82,9 @@ build-backend = "scikit_build_core.build" requires = [ "cmake>=3.26.4,!=3.30.0", "ninja", - "pylibcugraph==24.10.*,>=0.0.0a0", - "pylibraft==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "pylibcugraph==24.12.*,>=0.0.0a0", + "pylibraft==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true" diff --git a/python/cugraph/pytest.ini b/python/cugraph/pytest.ini index 675a6cf8fde..bca148538d9 100644 --- a/python/cugraph/pytest.ini +++ b/python/cugraph/pytest.ini @@ -17,6 +17,7 @@ addopts = --benchmark-max-time=0 --benchmark-min-rounds=1 --benchmark-columns="mean, rounds" + --tb=native ## do not run the slow tests/benchmarks by default -m "not slow" ## for use with rapids-pytest-benchmark plugin diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index 458421e2b6e..c3ca0b880a9 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -8,8 +8,8 @@ to run supported algorithms with GPU acceleration. nx-cugraph requires the following: * NVIDIA GPU, Volta architecture or later, with [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0+ - * CUDA 11.2, 11.4, 11.5, 11.8, or 12.0 - * Python version 3.9, 3.10, or 3.11 + * CUDA 11.2, 11.4, 11.5, 11.8, 12.0, 12.2, or 12.5 + * Python version 3.10, 3.11, or 3.12 * NetworkX >= version 3.0 (version 3.2 or higher recommended) More details about system requirements can be found in the [RAPIDS System Requirements documentation](https://docs.rapids.ai/install#system-req). diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index f58a6e2293b..a5e45979fe2 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -22,6 +22,7 @@ $ python _nx_cugraph/__init__.py """ +import os from _nx_cugraph._version import __version__ @@ -35,7 +36,7 @@ "backend_name": "cugraph", "project": "nx-cugraph", "package": "nx_cugraph", - "url": f"https://github.com/rapidsai/cugraph/tree/branch-{_version_major:0>2}.{_version_minor:0>2}/python/nx-cugraph", + "url": f"https://rapids.ai/nx-cugraph", "short_summary": "GPU-accelerated backend.", # "description": "TODO", "functions": { @@ -293,10 +294,19 @@ def get_info(): for key in info_keys: del d[key] + + d["default_config"] = { + "use_compat_graphs": os.environ.get("NX_CUGRAPH_USE_COMPAT_GRAPHS", "true") + .strip() + .lower() + == "true", + } return d -def _check_networkx_version(): +def _check_networkx_version() -> tuple[int, int]: + """Check the version of networkx and return ``(major, minor)`` version tuple.""" + import re import warnings import networkx as nx @@ -310,12 +320,20 @@ def _check_networkx_version(): UserWarning, stacklevel=2, ) - if len(version_minor) > 1: + + # Allow single-digit minor versions, e.g. 3.4 and release candidates, e.g. 3.4rc0 + pattern = r"^\d(rc\d+)?$" + + if not re.match(pattern, version_minor): raise RuntimeWarning( f"nx-cugraph version {__version__} does not work with networkx version " f"{nx.__version__}. Please upgrade (or fix) your Python environment." ) + nxver_major = int(version_major) + nxver_minor = int(re.match(r"^\d+", version_minor).group()) + return (nxver_major, nxver_minor) + if __name__ == "__main__": from pathlib import Path diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index ce46360e234..dab2ea70ef1 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.18 + rev: v0.19 hooks: - id: validate-pyproject name: Validate pyproject.toml @@ -40,29 +40,29 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.16.0 + rev: v3.17.0 hooks: - id: pyupgrade - args: [--py39-plus] + args: [--py310-plus] - repo: https://github.com/psf/black - rev: 24.4.2 + rev: 24.8.0 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.4 + rev: v0.6.7 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] - repo: https://github.com/PyCQA/flake8 - rev: 7.1.0 + rev: 7.1.1 hooks: - id: flake8 args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=B020,SIM105'] # Why is this necessary? additional_dependencies: &flake8_dependencies # These versions need updated manually - - flake8==7.1.0 - - flake8-bugbear==24.4.26 + - flake8==7.1.1 + - flake8-bugbear==24.8.19 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.4 + rev: v0.6.7 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py index 542256fa781..4404e57f645 100644 --- a/python/nx-cugraph/nx_cugraph/__init__.py +++ b/python/nx-cugraph/nx_cugraph/__init__.py @@ -12,6 +12,11 @@ # limitations under the License. from networkx.exception import * +from _nx_cugraph._version import __git_commit__, __version__ +from _nx_cugraph import _check_networkx_version + +_nxver: tuple[int, int] = _check_networkx_version() + from . import utils from . import classes @@ -32,7 +37,10 @@ from . import algorithms from .algorithms import * -from _nx_cugraph._version import __git_commit__, __version__ -from _nx_cugraph import _check_networkx_version +from .interface import BackendInterface -_check_networkx_version() +BackendInterface.Graph = classes.Graph +BackendInterface.DiGraph = classes.DiGraph +BackendInterface.MultiGraph = classes.MultiGraph +BackendInterface.MultiDiGraph = classes.MultiDiGraph +del BackendInterface diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py index 60276b7d41b..214970235c6 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py @@ -16,6 +16,7 @@ import networkx as nx import numpy as np +from nx_cugraph import _nxver from nx_cugraph.generators._utils import _create_using_class, _number_and_nodes from nx_cugraph.utils import index_dtype, networkx_algorithm @@ -48,7 +49,7 @@ def complete_bipartite_graph(n1, n2, create_using=None): nodes.extend(range(n2) if nodes2 is None else nodes2) if len(set(nodes)) != len(nodes): raise nx.NetworkXError("Inputs n1 and n2 must contain distinct nodes") - if nx.__version__[:3] <= "3.3": + if _nxver <= (3, 3): name = f"complete_bipartite_graph({orig_n1}, {orig_n2})" else: name = f"complete_bipartite_graph({n1}, {n2})" diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index ea1318060e0..52c512c454d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -12,9 +12,9 @@ # limitations under the License. import warnings -import networkx as nx import pylibcugraph as plc +from nx_cugraph import _nxver from nx_cugraph.convert import _to_undirected_graph from nx_cugraph.utils import ( _dtype_param, @@ -27,7 +27,7 @@ __all__ = ["louvain_communities"] # max_level argument was added to NetworkX 3.3 -if nx.__version__[:3] <= "3.2": +if _nxver <= (3, 2): _max_level_param = { "max_level : int, optional": ( "Upper limit of the number of macro-iterations (max: 500)." @@ -81,7 +81,7 @@ def _louvain_communities( node_ids, clusters, modularity = plc.louvain( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(weight, 1, dtype), - max_level=max_level, # TODO: add this parameter to NetworkX + max_level=max_level, threshold=threshold, resolution=resolution, do_expensive_check=False, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 8eb9a9946e7..e69ee88a17c 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -15,6 +15,7 @@ import pylibcugraph as plc import nx_cugraph as nxcg +from nx_cugraph import _nxver from nx_cugraph.convert import _to_undirected_graph from nx_cugraph.utils import ( _get_int_dtype, @@ -58,9 +59,12 @@ def _(G): @networkx_algorithm(is_incomplete=True, version_added="23.12", _plc="k_truss_subgraph") def k_truss(G, k): if is_nx := isinstance(G, nx.Graph): + is_compat_graph = isinstance(G, nxcg.Graph) G = nxcg.from_networkx(G, preserve_all_attrs=True) + else: + is_compat_graph = False if nxcg.number_of_selfloops(G) > 0: - if nx.__version__[:3] <= "3.2": + if _nxver <= (3, 2): exc_class = nx.NetworkXError else: exc_class = nx.NetworkXNotImplemented @@ -128,6 +132,7 @@ def k_truss(G, k): node_values, node_masks, key_to_id=key_to_id, + use_compat_graph=is_compat_graph, ) new_graph.graph.update(G.graph) return new_graph diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py index e529b83ab1a..cc59fd5eb64 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py @@ -15,6 +15,7 @@ import numpy as np import pylibcugraph as plc +from nx_cugraph import _nxver from nx_cugraph.convert import _to_graph from nx_cugraph.utils import ( _dtype_param, @@ -53,7 +54,7 @@ def hits( if nstart is not None: nstart = G._dict_to_nodearray(nstart, 0, dtype) if max_iter <= 0: - if nx.__version__[:3] <= "3.2": + if _nxver <= (3, 2): raise ValueError("`maxiter` must be a positive integer.") raise nx.PowerIterationFailedConvergence(max_iter) try: diff --git a/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py b/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py index f53b3458949..75dc5fbc706 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/operators/unary.py @@ -23,6 +23,7 @@ @networkx_algorithm(version_added="24.02") def complement(G): + is_compat_graph = isinstance(G, nxcg.Graph) G = _to_graph(G) N = G._N # Upcast to int64 so indices don't overflow. @@ -43,6 +44,7 @@ def complement(G): src_indices.astype(index_dtype), dst_indices.astype(index_dtype), key_to_id=G.key_to_id, + use_compat_graph=is_compat_graph, ) @@ -51,10 +53,16 @@ def reverse(G, copy=True): if not G.is_directed(): raise nx.NetworkXError("Cannot reverse an undirected graph.") if isinstance(G, nx.Graph): - if not copy: + is_compat_graph = isinstance(G, nxcg.Graph) + if not copy and not is_compat_graph: raise RuntimeError( "Using `copy=False` is invalid when using a NetworkX graph " "as input to `nx_cugraph.reverse`" ) G = nxcg.from_networkx(G, preserve_all_attrs=True) - return G.reverse(copy=copy) + else: + is_compat_graph = False + rv = G.reverse(copy=copy) + if is_compat_graph: + return rv._to_compat_graph() + return rv diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py index 7d6d77f34a4..ab3c7214303 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/generic.py @@ -14,6 +14,7 @@ import numpy as np import nx_cugraph as nxcg +from nx_cugraph import _nxver from nx_cugraph.convert import _to_graph from nx_cugraph.utils import _dtype_param, _get_float_dtype, networkx_algorithm @@ -57,7 +58,7 @@ def shortest_path( paths = nxcg.all_pairs_dijkstra_path(G, weight=weight, dtype=dtype) else: # method == 'bellman-ford': paths = nxcg.all_pairs_bellman_ford_path(G, weight=weight, dtype=dtype) - if nx.__version__[:3] <= "3.4": + if _nxver <= (3, 4): paths = dict(paths) # To target elif method == "unweighted": @@ -129,7 +130,7 @@ def shortest_path_length( # To target elif method == "unweighted": lengths = nxcg.single_target_shortest_path_length(G, target) - if nx.__version__[:3] <= "3.4": + if _nxver <= (3, 4): lengths = dict(lengths) elif method == "dijkstra": lengths = nxcg.single_source_dijkstra_path_length( diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py index 0e98c366e4a..e9c515632ca 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py @@ -17,6 +17,7 @@ import numpy as np import pylibcugraph as plc +from nx_cugraph import _nxver from nx_cugraph.convert import _to_graph from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm @@ -43,7 +44,7 @@ def single_source_shortest_path_length(G, source, cutoff=None): def single_target_shortest_path_length(G, target, cutoff=None): G = _to_graph(G) rv = _bfs(G, target, cutoff, "Target", return_type="length") - if nx.__version__[:3] <= "3.4": + if _nxver <= (3, 4): return iter(rv.items()) return rv @@ -61,7 +62,7 @@ def bidirectional_shortest_path(G, source, target): # TODO PERF: do bidirectional traversal in core G = _to_graph(G) if source not in G or target not in G: - if nx.__version__[:3] <= "3.3": + if _nxver <= (3, 3): raise nx.NodeNotFound( f"Either source {source} or target {target} is not in G" ) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index 5e4466d7d33..72d0079cf0c 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -18,6 +18,7 @@ import pylibcugraph as plc import nx_cugraph as nxcg +from nx_cugraph import _nxver from nx_cugraph.convert import _to_graph from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm @@ -57,7 +58,7 @@ def _bfs(G, source, *, depth_limit=None, reverse=False): return distances[mask], predecessors[mask], node_ids[mask] -if nx.__version__[:3] <= "3.3": +if _nxver <= (3, 3): @networkx_algorithm(is_incomplete=True, version_added="24.02", _plc="bfs") def generic_bfs_edges( @@ -132,13 +133,15 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): raise NotImplementedError( "sort_neighbors argument in bfs_tree is not currently supported" ) + is_compat_graph = isinstance(G, nxcg.Graph) G = _check_G_and_source(G, source) if depth_limit is not None and depth_limit < 1: - return nxcg.DiGraph.from_coo( + return nxcg.CudaDiGraph.from_coo( 1, cp.array([], dtype=index_dtype), cp.array([], dtype=index_dtype), id_to_key=[source], + use_compat_graph=is_compat_graph, ) distances, predecessors, node_ids = _bfs( @@ -148,11 +151,12 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): reverse=reverse, ) if predecessors.size == 0: - return nxcg.DiGraph.from_coo( + return nxcg.CudaDiGraph.from_coo( 1, cp.array([], dtype=index_dtype), cp.array([], dtype=index_dtype), id_to_key=[source], + use_compat_graph=is_compat_graph, ) # TODO: create renumbering helper function(s) unique_node_ids = cp.unique(cp.hstack((predecessors, node_ids))) @@ -170,11 +174,12 @@ def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): old_index: new_index for new_index, old_index in enumerate(unique_node_ids.tolist()) } - return nxcg.DiGraph.from_coo( + return nxcg.CudaDiGraph.from_coo( unique_node_ids.size, src_indices, dst_indices, key_to_id=key_to_id, + use_compat_graph=is_compat_graph, ) diff --git a/python/nx-cugraph/nx_cugraph/classes/__init__.py b/python/nx-cugraph/nx_cugraph/classes/__init__.py index 19a5357da55..71168e5364f 100644 --- a/python/nx-cugraph/nx_cugraph/classes/__init__.py +++ b/python/nx-cugraph/nx_cugraph/classes/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,9 +10,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .graph import Graph -from .digraph import DiGraph -from .multigraph import MultiGraph -from .multidigraph import MultiDiGraph +from .graph import CudaGraph, Graph +from .digraph import CudaDiGraph, DiGraph +from .multigraph import CudaMultiGraph, MultiGraph +from .multidigraph import CudaMultiDiGraph, MultiDiGraph from .function import * diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index e5cfb8f6815..178bf44f16e 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -18,34 +18,108 @@ import cupy as cp import networkx as nx import numpy as np +from networkx.classes.digraph import ( + _CachedPropertyResetterAdjAndSucc, + _CachedPropertyResetterPred, +) import nx_cugraph as nxcg from ..utils import index_dtype -from .graph import Graph +from .graph import CudaGraph, Graph if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import AttrKey -__all__ = ["DiGraph"] +__all__ = ["CudaDiGraph", "DiGraph"] networkx_api = nxcg.utils.decorators.networkx_class(nx.DiGraph) -class DiGraph(Graph): - ################# - # Class methods # - ################# +class DiGraph(nx.DiGraph, Graph): + _nx_attrs = ("_node", "_adj", "_succ", "_pred") + + name = Graph.name + _node = Graph._node + + @property + @networkx_api + def _adj(self): + if (adj := self.__dict__["_adj"]) is None: + self._reify_networkx() + adj = self.__dict__["_adj"] + return adj + + @_adj.setter + def _adj(self, val): + self._prepare_setter() + _CachedPropertyResetterAdjAndSucc.__set__(None, self, val) + if cache := getattr(self, "__networkx_cache__", None): + cache.clear() + + @property + @networkx_api + def _succ(self): + if (succ := self.__dict__["_succ"]) is None: + self._reify_networkx() + succ = self.__dict__["_succ"] + return succ + + @_succ.setter + def _succ(self, val): + self._prepare_setter() + _CachedPropertyResetterAdjAndSucc.__set__(None, self, val) + if cache := getattr(self, "__networkx_cache__", None): + cache.clear() + + @property + @networkx_api + def _pred(self): + if (pred := self.__dict__["_pred"]) is None: + self._reify_networkx() + pred = self.__dict__["_pred"] + return pred + + @_pred.setter + def _pred(self, val): + self._prepare_setter() + _CachedPropertyResetterPred.__set__(None, self, val) + if cache := getattr(self, "__networkx_cache__", None): + cache.clear() @classmethod @networkx_api def is_directed(cls) -> bool: return True + @classmethod + @networkx_api + def is_multigraph(cls) -> bool: + return False + + @classmethod + def to_cudagraph_class(cls) -> type[CudaDiGraph]: + return CudaDiGraph + @classmethod def to_networkx_class(cls) -> type[nx.DiGraph]: return nx.DiGraph + +class CudaDiGraph(CudaGraph): + ################# + # Class methods # + ################# + + is_directed = classmethod(DiGraph.is_directed.__func__) + is_multigraph = classmethod(DiGraph.is_multigraph.__func__) + to_cudagraph_class = classmethod(DiGraph.to_cudagraph_class.__func__) + to_networkx_class = classmethod(DiGraph.to_networkx_class.__func__) + + @classmethod + def _to_compat_graph_class(cls) -> type[DiGraph]: + return DiGraph + @networkx_api def size(self, weight: AttrKey | None = None) -> int: if weight is not None: @@ -57,7 +131,7 @@ def size(self, weight: AttrKey | None = None) -> int: ########################## @networkx_api - def reverse(self, copy: bool = True) -> DiGraph: + def reverse(self, copy: bool = True) -> CudaDiGraph: return self._copy(not copy, self.__class__, reverse=True) @networkx_api @@ -162,6 +236,7 @@ def to_undirected(self, reciprocal=False, as_view=False): node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=False, ) if as_view: rv.graph = self.graph diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 7425eacb2b4..cfe1e1c87e9 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -20,8 +20,13 @@ import networkx as nx import numpy as np import pylibcugraph as plc +from networkx.classes.graph import ( + _CachedPropertyResetterAdj, + _CachedPropertyResetterNode, +) import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import index_dtype @@ -40,57 +45,246 @@ any_ndarray, ) -__all__ = ["Graph"] +__all__ = ["CudaGraph", "Graph"] networkx_api = nxcg.utils.decorators.networkx_class(nx.Graph) +# The "everything" cache key is an internal implementation detail of NetworkX +# that may change between releases. +if _nxver < (3, 4): + _CACHE_KEY = ( + True, # Include all edge values + True, # Include all node values + True, # Include `.graph` attributes + ) +else: + _CACHE_KEY = ( + True, # Include all edge values + True, # Include all node values + # `.graph` attributes are always included now + ) + +# Use to indicate when a full conversion to GPU failed so we don't try again. +_CANT_CONVERT_TO_GPU = "_CANT_CONVERT_TO_GPU" + + +# `collections.UserDict` was the preferred way to subclass dict, but now +# subclassing dict directly is much better supported and should work here. +# This class should only be necessary if the user clears the cache manually. +class _GraphCache(dict): + """Cache that ensures Graph will reify into a NetworkX graph when cleared.""" + + _graph: Graph -class Graph: + def __init__(self, graph: Graph): + self._graph = graph + + def clear(self) -> None: + self._graph._reify_networkx() + super().clear() + + +class Graph(nx.Graph): # Tell networkx to dispatch calls with this object to nx-cugraph __networkx_backend__: ClassVar[str] = "cugraph" # nx >=3.2 __networkx_plugin__: ClassVar[str] = "cugraph" # nx <3.2 + # Core attributes of NetowkrX graphs that will be copied and cleared as appropriate. + # These attributes comprise the edge and node data model for NetworkX graphs. + _nx_attrs = ("_node", "_adj") + # Allow networkx dispatch machinery to cache conversions. # This means we should clear the cache if we ever mutate the object! - __networkx_cache__: dict | None + __networkx_cache__: _GraphCache | None # networkx properties graph: dict - graph_attr_dict_factory: ClassVar[type] = dict + # Should we declare type annotations for the rest? + + # Properties that trigger copying to the CPU + def _prepare_setter(self): + """Be careful when setting private attributes which may be used during init.""" + if ( + # If not present, then this must be in init + any(attr not in self.__dict__ for attr in self._nx_attrs) + # Already on the CPU + or not any(self.__dict__[attr] is None for attr in self._nx_attrs) + ): + return + if self._is_on_gpu: + # Copy from GPU to CPU + self._reify_networkx() + return + # Default values + for attr in self._nx_attrs: + if self.__dict__[attr] is None: + if attr == "_succ": + self.__dict__[attr] = self.__dict__["_adj"] + else: + self.__dict__[attr] = {} - # Not networkx properties - # We store edge data in COO format with {src,dst}_indices and edge_values. - src_indices: cp.ndarray[IndexValue] - dst_indices: cp.ndarray[IndexValue] - edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] - edge_masks: dict[AttrKey, cp.ndarray[bool]] - node_values: dict[AttrKey, any_ndarray[NodeValue]] - node_masks: dict[AttrKey, any_ndarray[bool]] - key_to_id: dict[NodeKey, IndexValue] | None - _id_to_key: list[NodeKey] | None - _N: int - _node_ids: cp.ndarray[IndexValue] | None # holds plc.SGGraph.vertices_array data + @property + @networkx_api + def _node(self): + if (node := self.__dict__["_node"]) is None: + self._reify_networkx() + node = self.__dict__["_node"] + return node + + @_node.setter + def _node(self, val): + self._prepare_setter() + _CachedPropertyResetterNode.__set__(None, self, val) + if cache := getattr(self, "__networkx_cache__", None): + cache.clear() - # Used by graph._get_plc_graph - _plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = { - # signed int - np.dtype(np.int8): np.dtype(np.float32), - np.dtype(np.int16): np.dtype(np.float32), - np.dtype(np.int32): np.dtype(np.float64), - np.dtype(np.int64): np.dtype(np.float64), # raise if abs(x) > 2**53 - # unsigned int - np.dtype(np.uint8): np.dtype(np.float32), - np.dtype(np.uint16): np.dtype(np.float32), - np.dtype(np.uint32): np.dtype(np.float64), - np.dtype(np.uint64): np.dtype(np.float64), # raise if x > 2**53 - # other - np.dtype(np.bool_): np.dtype(np.float32), - np.dtype(np.float16): np.dtype(np.float32), - } - _plc_allowed_edge_types: ClassVar[set[np.dtype]] = { - np.dtype(np.float32), - np.dtype(np.float64), - } + @property + @networkx_api + def _adj(self): + if (adj := self.__dict__["_adj"]) is None: + self._reify_networkx() + adj = self.__dict__["_adj"] + return adj + + @_adj.setter + def _adj(self, val): + self._prepare_setter() + _CachedPropertyResetterAdj.__set__(None, self, val) + if cache := getattr(self, "__networkx_cache__", None): + cache.clear() + + @property + def _is_on_gpu(self) -> bool: + """Whether the full graph is on device (in the cache). + + This returns False when only a subset of the graph (such as only + edge indices and edge attribute) is on device. + + The graph may be on host (CPU) and device (GPU) at the same time. + """ + cache = getattr(self, "__networkx_cache__", None) + if not cache: + return False + return _CACHE_KEY in cache.get("backends", {}).get("cugraph", {}) + + @property + def _is_on_cpu(self) -> bool: + """Whether the graph is on host as a NetworkX graph. + + This means the core data structures that comprise a NetworkX graph + (such as ``G._node`` and ``G._adj``) are present. + + The graph may be on host (CPU) and device (GPU) at the same time. + """ + return self.__dict__["_node"] is not None + + @property + def _cudagraph(self): + """Return the full ``CudaGraph`` on device, computing if necessary, or None.""" + nx_cache = getattr(self, "__networkx_cache__", None) + if nx_cache is None: + nx_cache = {} + elif _CANT_CONVERT_TO_GPU in nx_cache: + return None + cache = nx_cache.setdefault("backends", {}).setdefault("cugraph", {}) + if (Gcg := cache.get(_CACHE_KEY)) is not None: + if isinstance(Gcg, Graph): + # This shouldn't happen during normal use, but be extra-careful anyway + return Gcg._cudagraph + return Gcg + if self.__dict__["_node"] is None: + raise RuntimeError( + f"{type(self).__name__} cannot be converted to the GPU, because it is " + "not on the CPU! This is not supposed to be possible. If you believe " + "you have found a bug, please report a minimum reproducible example to " + "https://github.com/rapidsai/cugraph/issues/new/choose" + ) + try: + Gcg = nxcg.from_networkx( + self, preserve_edge_attrs=True, preserve_node_attrs=True + ) + except Exception: + # Should we warn that the full graph can't be on GPU? + nx_cache[_CANT_CONVERT_TO_GPU] = True + return None + Gcg.graph = self.graph + cache[_CACHE_KEY] = Gcg + return Gcg + + @_cudagraph.setter + def _cudagraph(self, val, *, clear_cpu=True): + """Set the full ``CudaGraph`` for this graph, or remove from device if None.""" + if (cache := getattr(self, "__networkx_cache__", None)) is None: + # Should we warn? + return + # TODO: pay close attention to when we should clear the cache, since + # this may or may not be a mutation. + cache = cache.setdefault("backends", {}).setdefault("cugraph", {}) + if val is None: + cache.pop(_CACHE_KEY, None) + else: + self.graph = val.graph + cache[_CACHE_KEY] = val + if clear_cpu: + for key in self._nx_attrs: + self.__dict__[key] = None + + @nx.Graph.name.setter + def name(self, s): + # Don't clear the cache when setting the name, since `.graph` is shared. + # There is a very small risk here for the cache to become (slightly) + # insconsistent if graphs from other backends are cached. + self.graph["name"] = s + + @classmethod + @networkx_api + def is_directed(cls) -> bool: + return False + + @classmethod + @networkx_api + def is_multigraph(cls) -> bool: + return False + + @classmethod + def to_cudagraph_class(cls) -> type[CudaGraph]: + return CudaGraph + + @classmethod + @networkx_api + def to_directed_class(cls) -> type[nxcg.DiGraph]: + return nxcg.DiGraph + + @classmethod + def to_networkx_class(cls) -> type[nx.Graph]: + return nx.Graph + + @classmethod + @networkx_api + def to_undirected_class(cls) -> type[Graph]: + return Graph + + def __init__(self, incoming_graph_data=None, **attr): + super().__init__(incoming_graph_data, **attr) + self.__networkx_cache__ = _GraphCache(self) + + def _reify_networkx(self) -> None: + """Copy graph to host (CPU) if necessary.""" + if self.__dict__["_node"] is None: + # After we make this into an nx graph, we rely on the cache being correct + Gcg = self._cudagraph + G = nxcg.to_networkx(Gcg) + for key in self._nx_attrs: + self.__dict__[key] = G.__dict__[key] + + def _become(self, other: Graph): + if self.__class__ is not other.__class__: + raise TypeError( + "Attempting to update graph inplace with graph of different type!" + ) + # Begin with the simplest implementation; do we need to do more? + self.__dict__.update(other.__dict__) + return self #################### # Creation methods # @@ -109,9 +303,10 @@ def from_coo( *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: - new_graph = object.__new__(cls) + ) -> Graph | CudaGraph: + new_graph = object.__new__(cls.to_cudagraph_class()) new_graph.__networkx_cache__ = {} new_graph.src_indices = src_indices new_graph.dst_indices = dst_indices @@ -173,7 +368,8 @@ def from_coo( isolates = nxcg.algorithms.isolate._isolates(new_graph) if len(isolates) > 0: new_graph._node_ids = cp.arange(new_graph._N, dtype=index_dtype) - + if use_compat_graph or use_compat_graph is None and issubclass(cls, Graph): + new_graph = new_graph._to_compat_graph() return new_graph @classmethod @@ -188,8 +384,9 @@ def from_csr( *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: + ) -> Graph | CudaGraph: N = indptr.size - 1 src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead @@ -205,6 +402,7 @@ def from_csr( node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=use_compat_graph, **attr, ) @@ -220,8 +418,9 @@ def from_csc( *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: + ) -> Graph | CudaGraph: N = indptr.size - 1 dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead @@ -237,6 +436,7 @@ def from_csc( node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=use_compat_graph, **attr, ) @@ -254,8 +454,9 @@ def from_dcsr( *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: + ) -> Graph | CudaGraph: src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_srcs.get(), cp.diff(indptr).get()) @@ -270,6 +471,7 @@ def from_dcsr( node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=use_compat_graph, **attr, ) @@ -287,8 +489,9 @@ def from_dcsc( *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: + ) -> Graph | CudaGraph: dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_dsts.get(), cp.diff(indptr).get()) @@ -303,13 +506,75 @@ def from_dcsc( node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=use_compat_graph, **attr, ) - def __new__(cls, incoming_graph_data=None, **attr) -> Graph: + +class CudaGraph: + # Tell networkx to dispatch calls with this object to nx-cugraph + __networkx_backend__: ClassVar[str] = "cugraph" # nx >=3.2 + __networkx_plugin__: ClassVar[str] = "cugraph" # nx <3.2 + + # Allow networkx dispatch machinery to cache conversions. + # This means we should clear the cache if we ever mutate the object! + __networkx_cache__: dict | None + + # networkx properties + graph: dict + graph_attr_dict_factory: ClassVar[type] = dict + + # Not networkx properties + # We store edge data in COO format with {src,dst}_indices and edge_values. + src_indices: cp.ndarray[IndexValue] + dst_indices: cp.ndarray[IndexValue] + edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] + edge_masks: dict[AttrKey, cp.ndarray[bool]] + node_values: dict[AttrKey, any_ndarray[NodeValue]] + node_masks: dict[AttrKey, any_ndarray[bool]] + key_to_id: dict[NodeKey, IndexValue] | None + _id_to_key: list[NodeKey] | None + _N: int + _node_ids: cp.ndarray[IndexValue] | None # holds plc.SGGraph.vertices_array data + + # Used by graph._get_plc_graph + _plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = { + # signed int + np.dtype(np.int8): np.dtype(np.float32), + np.dtype(np.int16): np.dtype(np.float32), + np.dtype(np.int32): np.dtype(np.float64), + np.dtype(np.int64): np.dtype(np.float64), # raise if abs(x) > 2**53 + # unsigned int + np.dtype(np.uint8): np.dtype(np.float32), + np.dtype(np.uint16): np.dtype(np.float32), + np.dtype(np.uint32): np.dtype(np.float64), + np.dtype(np.uint64): np.dtype(np.float64), # raise if x > 2**53 + # other + np.dtype(np.bool_): np.dtype(np.float32), + np.dtype(np.float16): np.dtype(np.float32), + } + _plc_allowed_edge_types: ClassVar[set[np.dtype]] = { + np.dtype(np.float32), + np.dtype(np.float64), + } + + #################### + # Creation methods # + #################### + + from_coo = classmethod(Graph.from_coo.__func__) + from_csr = classmethod(Graph.from_csr.__func__) + from_csc = classmethod(Graph.from_csc.__func__) + from_dcsr = classmethod(Graph.from_dcsr.__func__) + from_dcsc = classmethod(Graph.from_dcsc.__func__) + + def __new__(cls, incoming_graph_data=None, **attr) -> CudaGraph: if incoming_graph_data is None: new_graph = cls.from_coo( - 0, cp.empty(0, index_dtype), cp.empty(0, index_dtype) + 0, + cp.empty(0, index_dtype), + cp.empty(0, index_dtype), + use_compat_graph=False, ) elif incoming_graph_data.__class__ is cls: new_graph = incoming_graph_data.copy() @@ -318,34 +583,30 @@ def __new__(cls, incoming_graph_data=None, **attr) -> Graph: else: raise NotImplementedError new_graph.graph.update(attr) + # We could return Graph here (if configured), but let's not for now return new_graph ################# # Class methods # ################# - @classmethod - @networkx_api - def is_directed(cls) -> bool: - return False + is_directed = classmethod(Graph.is_directed.__func__) + is_multigraph = classmethod(Graph.is_multigraph.__func__) + to_cudagraph_class = classmethod(Graph.to_cudagraph_class.__func__) + to_networkx_class = classmethod(Graph.to_networkx_class.__func__) @classmethod @networkx_api - def is_multigraph(cls) -> bool: - return False + def to_directed_class(cls) -> type[nxcg.CudaDiGraph]: + return nxcg.CudaDiGraph @classmethod @networkx_api - def to_directed_class(cls) -> type[nxcg.DiGraph]: - return nxcg.DiGraph - - @classmethod - def to_networkx_class(cls) -> type[nx.Graph]: - return nx.Graph + def to_undirected_class(cls) -> type[CudaGraph]: + return CudaGraph @classmethod - @networkx_api - def to_undirected_class(cls) -> type[Graph]: + def _to_compat_graph_class(cls) -> type[Graph]: return Graph ############## @@ -438,7 +699,7 @@ def clear_edges(self) -> None: cache.clear() @networkx_api - def copy(self, as_view: bool = False) -> Graph: + def copy(self, as_view: bool = False) -> CudaGraph: # Does shallow copy in networkx return self._copy(as_view, self.__class__) @@ -534,14 +795,19 @@ def size(self, weight: AttrKey | None = None) -> int: return int(cp.count_nonzero(self.src_indices <= self.dst_indices)) @networkx_api - def to_directed(self, as_view: bool = False) -> nxcg.DiGraph: + def to_directed(self, as_view: bool = False) -> nxcg.CudaDiGraph: return self._copy(as_view, self.to_directed_class()) @networkx_api - def to_undirected(self, as_view: bool = False) -> Graph: + def to_undirected(self, as_view: bool = False) -> CudaGraph: # Does deep copy in networkx return self._copy(as_view, self.to_undirected_class()) + def _to_compat_graph(self) -> Graph: + rv = self._to_compat_graph_class()() + rv._cudagraph = self + return rv + # Not implemented... # adj, adjacency, add_edge, add_edges_from, add_node, # add_nodes_from, add_weighted_edges_from, degree, @@ -552,8 +818,8 @@ def to_undirected(self, as_view: bool = False) -> Graph: # Private methods # ################### - def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): - # DRY warning: see also MultiGraph._copy + def _copy(self, as_view: bool, cls: type[CudaGraph], reverse: bool = False): + # DRY warning: see also CudaMultiGraph._copy src_indices = self.src_indices dst_indices = self.dst_indices edge_values = self.edge_values @@ -593,6 +859,7 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=False, ) if as_view: rv.graph = self.graph @@ -689,6 +956,14 @@ def _get_plc_graph( src_indices = src_indices.astype(index_dtype) dst_indices = dst_indices.astype(index_dtype) + # This sets drop_multi_edges=True for non-multigraph input, which means + # the data in self.src_indices and self.dst_indices may not be + # identical to that contained in the returned pcl.SGGraph (the returned + # SGGraph may have fewer edges since duplicates are dropped). Ideally + # self.src_indices and self.dst_indices would be updated to have + # duplicate edges removed for non-multigraph instances, but that + # requires additional code which would be redundant and likely not as + # performant as the code in PLC. return plc.SGGraph( resource_handle=plc.ResourceHandle(), graph_properties=plc.GraphProperties( @@ -702,10 +977,11 @@ def _get_plc_graph( renumber=False, do_expensive_check=False, vertices_array=self._node_ids, + drop_multi_edges=not self.is_multigraph(), ) def _sort_edge_indices(self, primary="src"): - # DRY warning: see also MultiGraph._sort_edge_indices + # DRY warning: see also CudaMultiGraph._sort_edge_indices if primary == "src": stacked = cp.vstack((self.dst_indices, self.src_indices)) elif primary == "dst": @@ -727,7 +1003,7 @@ def _sort_edge_indices(self, primary="src"): {key: val[indices] for key, val in self.edge_masks.items()} ) - def _become(self, other: Graph): + def _become(self, other: CudaGraph): if self.__class__ is not other.__class__: raise TypeError( "Attempting to update graph inplace with graph of different type!" diff --git a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py index 2e7a55a9eb1..5a6595567d2 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py @@ -16,24 +16,51 @@ import nx_cugraph as nxcg -from .digraph import DiGraph -from .multigraph import MultiGraph +from .digraph import CudaDiGraph, DiGraph +from .graph import Graph +from .multigraph import CudaMultiGraph, MultiGraph -__all__ = ["MultiDiGraph"] +__all__ = ["CudaMultiDiGraph", "MultiDiGraph"] networkx_api = nxcg.utils.decorators.networkx_class(nx.MultiDiGraph) -class MultiDiGraph(MultiGraph, DiGraph): +class MultiDiGraph(nx.MultiDiGraph, MultiGraph, DiGraph): + name = Graph.name + _node = Graph._node + _adj = DiGraph._adj + _succ = DiGraph._succ + _pred = DiGraph._pred + @classmethod @networkx_api def is_directed(cls) -> bool: return True + @classmethod + @networkx_api + def is_multigraph(cls) -> bool: + return True + + @classmethod + def to_cudagraph_class(cls) -> type[CudaMultiDiGraph]: + return CudaMultiDiGraph + @classmethod def to_networkx_class(cls) -> type[nx.MultiDiGraph]: return nx.MultiDiGraph + +class CudaMultiDiGraph(CudaMultiGraph, CudaDiGraph): + is_directed = classmethod(MultiDiGraph.is_directed.__func__) + is_multigraph = classmethod(MultiDiGraph.is_multigraph.__func__) + to_cudagraph_class = classmethod(MultiDiGraph.to_cudagraph_class.__func__) + to_networkx_class = classmethod(MultiDiGraph.to_networkx_class.__func__) + + @classmethod + def _to_compat_graph_class(cls) -> type[MultiDiGraph]: + return MultiDiGraph + ########################## # NetworkX graph methods # ########################## diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py index 23d9faa8734..c8c8f1dfb00 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multigraph.py @@ -22,7 +22,7 @@ import nx_cugraph as nxcg from ..utils import index_dtype -from .graph import Graph +from .graph import CudaGraph, Graph, _GraphCache if TYPE_CHECKING: from nx_cugraph.typing import ( @@ -34,32 +34,47 @@ NodeValue, any_ndarray, ) -__all__ = ["MultiGraph"] +__all__ = ["MultiGraph", "CudaMultiGraph"] networkx_api = nxcg.utils.decorators.networkx_class(nx.MultiGraph) -class MultiGraph(Graph): - # networkx properties - edge_key_dict_factory: ClassVar[type] = dict +class MultiGraph(nx.MultiGraph, Graph): + name = Graph.name + _node = Graph._node + _adj = Graph._adj - # Not networkx properties + @classmethod + @networkx_api + def is_directed(cls) -> bool: + return False - # In a MultiGraph, each edge has a unique `(src, dst, key)` key. - # By default, `key` is 0 if possible, else 1, else 2, etc. - # This key can be any hashable Python object in NetworkX. - # We don't use a dict for our data structure here, because - # that would require a `(src, dst, key)` key. - # Instead, we keep `edge_keys` and/or `edge_indices`. - # `edge_keys` is the list of Python objects for each edge. - # `edge_indices` is for the common case of default multiedge keys, - # in which case we can store it as a cupy array. - # `edge_indices` is generally preferred. It is possible to provide - # both where edge_indices is the default and edge_keys is anything. - # It is also possible for them both to be None, which means the - # default edge indices has not yet been calculated. - edge_indices: cp.ndarray[IndexValue] | None - edge_keys: list[EdgeKey] | None + @classmethod + @networkx_api + def is_multigraph(cls) -> bool: + return True + + @classmethod + def to_cudagraph_class(cls) -> type[CudaMultiGraph]: + return CudaMultiGraph + + @classmethod + @networkx_api + def to_directed_class(cls) -> type[nxcg.MultiDiGraph]: + return nxcg.MultiDiGraph + + @classmethod + def to_networkx_class(cls) -> type[nx.MultiGraph]: + return nx.MultiGraph + + @classmethod + @networkx_api + def to_undirected_class(cls) -> type[MultiGraph]: + return MultiGraph + + def __init__(self, incoming_graph_data=None, multigraph_input=None, **attr): + super().__init__(incoming_graph_data, multigraph_input, **attr) + self.__networkx_cache__ = _GraphCache(self) #################### # Creation methods # @@ -80,9 +95,10 @@ def from_coo( key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, edge_keys: list[EdgeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> MultiGraph: - new_graph = super().from_coo( + ) -> MultiGraph | CudaMultiGraph: + new_graph = super(cls.to_undirected_class(), cls).from_coo( N, src_indices, dst_indices, @@ -92,6 +108,7 @@ def from_coo( node_masks, key_to_id=key_to_id, id_to_key=id_to_key, + use_compat_graph=False, **attr, ) new_graph.edge_indices = edge_indices @@ -102,6 +119,8 @@ def from_coo( and len(new_graph.edge_keys) != src_indices.size ): raise ValueError + if use_compat_graph or use_compat_graph is None and issubclass(cls, Graph): + new_graph = new_graph._to_compat_graph() return new_graph @classmethod @@ -118,8 +137,9 @@ def from_csr( key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, edge_keys: list[EdgeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> MultiGraph: + ) -> MultiGraph | CudaMultiGraph: N = indptr.size - 1 src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead @@ -137,6 +157,7 @@ def from_csr( key_to_id=key_to_id, id_to_key=id_to_key, edge_keys=edge_keys, + use_compat_graph=use_compat_graph, **attr, ) @@ -154,8 +175,9 @@ def from_csc( key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, edge_keys: list[EdgeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> MultiGraph: + ) -> MultiGraph | CudaMultiGraph: N = indptr.size - 1 dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead @@ -173,6 +195,7 @@ def from_csc( key_to_id=key_to_id, id_to_key=id_to_key, edge_keys=edge_keys, + use_compat_graph=use_compat_graph, **attr, ) @@ -192,8 +215,9 @@ def from_dcsr( key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, edge_keys: list[EdgeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> MultiGraph: + ) -> MultiGraph | CudaMultiGraph: src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_srcs.get(), cp.diff(indptr).get()) @@ -210,6 +234,7 @@ def from_dcsr( key_to_id=key_to_id, id_to_key=id_to_key, edge_keys=edge_keys, + use_compat_graph=use_compat_graph, **attr, ) @@ -229,8 +254,9 @@ def from_dcsc( key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, edge_keys: list[EdgeKey] | None = None, + use_compat_graph: bool | None = None, **attr, - ) -> Graph: + ) -> MultiGraph | CudaGraph: dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_dsts.get(), cp.diff(indptr).get()) @@ -247,12 +273,46 @@ def from_dcsc( key_to_id=key_to_id, id_to_key=id_to_key, edge_keys=edge_keys, + use_compat_graph=use_compat_graph, **attr, ) + +class CudaMultiGraph(CudaGraph): + # networkx properties + edge_key_dict_factory: ClassVar[type] = dict + + # Not networkx properties + + # In a MultiGraph, each edge has a unique `(src, dst, key)` key. + # By default, `key` is 0 if possible, else 1, else 2, etc. + # This key can be any hashable Python object in NetworkX. + # We don't use a dict for our data structure here, because + # that would require a `(src, dst, key)` key. + # Instead, we keep `edge_keys` and/or `edge_indices`. + # `edge_keys` is the list of Python objects for each edge. + # `edge_indices` is for the common case of default multiedge keys, + # in which case we can store it as a cupy array. + # `edge_indices` is generally preferred. It is possible to provide + # both where edge_indices is the default and edge_keys is anything. + # It is also possible for them both to be None, which means the + # default edge indices has not yet been calculated. + edge_indices: cp.ndarray[IndexValue] | None + edge_keys: list[EdgeKey] | None + + #################### + # Creation methods # + #################### + + from_coo = classmethod(MultiGraph.from_coo.__func__) + from_csr = classmethod(MultiGraph.from_csr.__func__) + from_csc = classmethod(MultiGraph.from_csc.__func__) + from_dcsr = classmethod(MultiGraph.from_dcsr.__func__) + from_dcsc = classmethod(MultiGraph.from_dcsc.__func__) + def __new__( cls, incoming_graph_data=None, multigraph_input=None, **attr - ) -> MultiGraph: + ) -> CudaMultiGraph: if isinstance(incoming_graph_data, dict) and multigraph_input is not False: new_graph = nxcg.from_networkx( nx.MultiGraph(incoming_graph_data, multigraph_input=multigraph_input), @@ -267,28 +327,23 @@ def __new__( # Class methods # ################# - @classmethod - @networkx_api - def is_directed(cls) -> bool: - return False + is_directed = classmethod(MultiGraph.is_directed.__func__) + is_multigraph = classmethod(MultiGraph.is_multigraph.__func__) + to_cudagraph_class = classmethod(MultiGraph.to_cudagraph_class.__func__) + to_networkx_class = classmethod(MultiGraph.to_networkx_class.__func__) @classmethod @networkx_api - def is_multigraph(cls) -> bool: - return True + def to_directed_class(cls) -> type[nxcg.CudaMultiDiGraph]: + return nxcg.CudaMultiDiGraph @classmethod @networkx_api - def to_directed_class(cls) -> type[nxcg.MultiDiGraph]: - return nxcg.MultiDiGraph - - @classmethod - def to_networkx_class(cls) -> type[nx.MultiGraph]: - return nx.MultiGraph + def to_undirected_class(cls) -> type[CudaMultiGraph]: + return CudaMultiGraph @classmethod - @networkx_api - def to_undirected_class(cls) -> type[MultiGraph]: + def _to_compat_graph_class(cls) -> type[MultiGraph]: return MultiGraph ########################## @@ -308,7 +363,7 @@ def clear_edges(self) -> None: self.edge_keys = None @networkx_api - def copy(self, as_view: bool = False) -> MultiGraph: + def copy(self, as_view: bool = False) -> CudaMultiGraph: # Does shallow copy in networkx return self._copy(as_view, self.__class__) @@ -391,11 +446,11 @@ def has_edge(self, u: NodeKey, v: NodeKey, key: EdgeKey | None = None) -> bool: return any(edge_keys[i] == key for i in indices.tolist()) @networkx_api - def to_directed(self, as_view: bool = False) -> nxcg.MultiDiGraph: + def to_directed(self, as_view: bool = False) -> nxcg.CudaMultiDiGraph: return self._copy(as_view, self.to_directed_class()) @networkx_api - def to_undirected(self, as_view: bool = False) -> MultiGraph: + def to_undirected(self, as_view: bool = False) -> CudaMultiGraph: # Does deep copy in networkx return self._copy(as_view, self.to_undirected_class()) @@ -403,8 +458,8 @@ def to_undirected(self, as_view: bool = False) -> MultiGraph: # Private methods # ################### - def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): - # DRY warning: see also Graph._copy + def _copy(self, as_view: bool, cls: type[CudaGraph], reverse: bool = False): + # DRY warning: see also CudaGraph._copy src_indices = self.src_indices dst_indices = self.dst_indices edge_indices = self.edge_indices @@ -451,6 +506,7 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): key_to_id=key_to_id, id_to_key=id_to_key, edge_keys=edge_keys, + use_compat_graph=False, ) if as_view: rv.graph = self.graph @@ -460,7 +516,7 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): return rv def _sort_edge_indices(self, primary="src"): - # DRY warning: see also Graph._sort_edge_indices + # DRY warning: see also CudaGraph._sort_edge_indices if self.edge_indices is None and self.edge_keys is None: return super()._sort_edge_indices(primary=primary) if primary == "src": diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index 56d16d837d7..a872f13ac70 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -12,6 +12,7 @@ # limitations under the License. from __future__ import annotations +import functools import itertools import operator as op from collections import Counter, defaultdict @@ -23,9 +24,13 @@ import numpy as np import nx_cugraph as nxcg +from nx_cugraph import _nxver from .utils import index_dtype, networkx_algorithm -from .utils.misc import pairwise +from .utils.misc import _And_NotImplementedError, pairwise + +if _nxver >= (3, 4): + from networkx.utils.backends import _get_cache_key, _get_from_cache, _set_to_cache if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue, any_ndarray @@ -60,6 +65,27 @@ def _iterate_values(graph, adj, is_dicts, func): return func(it), False +# Consider adding this to `utils` if it is useful elsewhere +def _fallback_decorator(func): + """Catch and convert exceptions to ``NotImplementedError``; use as a decorator. + + ``nx.NetworkXError`` are raised without being converted. This allows + falling back to other backends if, for example, conversion to GPU failed. + """ + + @functools.wraps(func) + def inner(*args, **kwargs): + try: + return func(*args, **kwargs) + except nx.NetworkXError: + raise + except Exception as exc: + raise _And_NotImplementedError(exc) from exc + + return inner + + +@_fallback_decorator def from_networkx( graph: nx.Graph, edge_attrs: AttrKey | dict[AttrKey, EdgeValue | None] | None = None, @@ -74,7 +100,8 @@ def from_networkx( as_directed: bool = False, name: str | None = None, graph_name: str | None = None, -) -> nxcg.Graph: + use_compat_graph: bool | None = False, +) -> nxcg.Graph | nxcg.CudaGraph: """Convert a networkx graph to nx_cugraph graph; can convert all attributes. Parameters @@ -114,10 +141,16 @@ def from_networkx( The name of the algorithm when dispatched from networkx. graph_name : str, optional The name of the graph argument geing converted when dispatched from networkx. + use_compat_graph : bool or None, default False + Indicate whether to return a graph that is compatible with NetworkX graph. + For example, ``nx_cugraph.Graph`` can be used as a NetworkX graph and can + reside in host (CPU) or device (GPU) memory. The default is False, which + will return e.g. ``nx_cugraph.CudaGraph`` that only resides on device (GPU) + and is not fully compatible as a NetworkX graph. Returns ------- - nx_cugraph.Graph + nx_cugraph.Graph or nx_cugraph.CudaGraph Notes ----- @@ -145,6 +178,41 @@ def from_networkx( graph = G else: raise TypeError(f"Expected networkx.Graph; got {type(graph)}") + elif isinstance(graph, nxcg.Graph): + if ( + use_compat_graph + # Use compat graphs by default + or use_compat_graph is None + and (_nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs) + ): + return graph + if graph._is_on_gpu: + return graph._cudagraph + if not graph._is_on_cpu: + raise RuntimeError( + f"{type(graph).__name__} cannot be converted to the GPU, because it is " + "not on the CPU! This is not supposed to be possible. If you believe " + "you have found a bug, please report a minimum reproducible example to " + "https://github.com/rapidsai/cugraph/issues/new/choose" + ) + if _nxver >= (3, 4): + cache_key = _get_cache_key( + edge_attrs=edge_attrs, + node_attrs=node_attrs, + preserve_edge_attrs=preserve_edge_attrs, + preserve_node_attrs=preserve_node_attrs, + preserve_graph_attrs=preserve_graph_attrs, + ) + cache = getattr(graph, "__networkx_cache__", None) + if cache is not None: + cache = cache.setdefault("backends", {}).setdefault("cugraph", {}) + compat_key, rv = _get_from_cache(cache, cache_key) + if rv is not None: + if isinstance(rv, nxcg.Graph): + # This shouldn't happen during normal use, but be extra-careful + rv = rv._cudagraph + if rv is not None: + return rv if preserve_all_attrs: preserve_edge_attrs = True @@ -165,7 +233,12 @@ def from_networkx( else: node_attrs = {node_attrs: None} - if graph.__class__ in {nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph}: + if graph.__class__ in { + nx.Graph, + nx.DiGraph, + nx.MultiGraph, + nx.MultiDiGraph, + } or isinstance(graph, nxcg.Graph): # This is a NetworkX private attribute, but is much faster to use adj = graph._adj else: @@ -455,9 +528,9 @@ def func(it, edge_attr=edge_attr, dtype=dtype): # if vals.ndim > 1: ... if graph.is_multigraph(): if graph.is_directed() or as_directed: - klass = nxcg.MultiDiGraph + klass = nxcg.CudaMultiDiGraph else: - klass = nxcg.MultiGraph + klass = nxcg.CudaMultiGraph rv = klass.from_coo( N, src_indices, @@ -469,12 +542,13 @@ def func(it, edge_attr=edge_attr, dtype=dtype): node_masks, key_to_id=key_to_id, edge_keys=edge_keys, + use_compat_graph=False, ) else: if graph.is_directed() or as_directed: - klass = nxcg.DiGraph + klass = nxcg.CudaDiGraph else: - klass = nxcg.Graph + klass = nxcg.CudaGraph rv = klass.from_coo( N, src_indices, @@ -484,9 +558,22 @@ def func(it, edge_attr=edge_attr, dtype=dtype): node_values, node_masks, key_to_id=key_to_id, + use_compat_graph=False, ) if preserve_graph_attrs: rv.graph.update(graph.graph) # deepcopy? + if _nxver >= (3, 4) and isinstance(graph, nxcg.Graph) and cache is not None: + # Make sure this conversion is added to the cache, and make all of + # our graphs share the same `.graph` attribute for consistency. + rv.graph = graph.graph + _set_to_cache(cache, cache_key, rv) + if ( + use_compat_graph + # Use compat graphs by default + or use_compat_graph is None + and (_nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs) + ): + return rv._to_compat_graph() return rv @@ -535,14 +622,16 @@ def _iter_attr_dicts( return full_dicts -def to_networkx(G: nxcg.Graph, *, sort_edges: bool = False) -> nx.Graph: +def to_networkx( + G: nxcg.Graph | nxcg.CudaGraph, *, sort_edges: bool = False +) -> nx.Graph: """Convert a nx_cugraph graph to networkx graph. All edge and node attributes and ``G.graph`` properties are converted. Parameters ---------- - G : nx_cugraph.Graph + G : nx_cugraph.Graph or nx_cugraph.CudaGraph sort_edges : bool, default False Whether to sort the edge data of the input graph by (src, dst) indices before converting. This can be useful to convert to networkx graphs @@ -557,6 +646,9 @@ def to_networkx(G: nxcg.Graph, *, sort_edges: bool = False) -> nx.Graph: -------- from_networkx : The opposite; convert networkx graph to nx_cugraph graph """ + if isinstance(G, nxcg.Graph): + # These graphs are already NetworkX graphs :) + return G rv = G.to_networkx_class()() id_to_key = G.id_to_key if sort_edges: @@ -623,13 +715,13 @@ def _to_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> nxcg.Graph | nxcg.DiGraph: +) -> nxcg.CudaGraph | nxcg.CudaDiGraph: """Ensure that input type is a nx_cugraph graph, and convert if necessary. Directed and undirected graphs are both allowed. This is an internal utility function and may change or be removed. """ - if isinstance(G, nxcg.Graph): + if isinstance(G, nxcg.CudaGraph): return G if isinstance(G, nx.Graph): return from_networkx( @@ -644,15 +736,15 @@ def _to_directed_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> nxcg.DiGraph: - """Ensure that input type is a nx_cugraph DiGraph, and convert if necessary. +) -> nxcg.CudaDiGraph: + """Ensure that input type is a nx_cugraph CudaDiGraph, and convert if necessary. Undirected graphs will be converted to directed. This is an internal utility function and may change or be removed. """ - if isinstance(G, nxcg.DiGraph): + if isinstance(G, nxcg.CudaDiGraph): return G - if isinstance(G, nxcg.Graph): + if isinstance(G, nxcg.CudaGraph): return G.to_directed() if isinstance(G, nx.Graph): return from_networkx( @@ -670,13 +762,13 @@ def _to_undirected_graph( edge_attr: AttrKey | None = None, edge_default: EdgeValue | None = 1, edge_dtype: Dtype | None = None, -) -> nxcg.Graph: - """Ensure that input type is a nx_cugraph Graph, and convert if necessary. +) -> nxcg.CudaGraph: + """Ensure that input type is a nx_cugraph CudaGraph, and convert if necessary. Only undirected graphs are allowed. Directed graphs will raise ValueError. This is an internal utility function and may change or be removed. """ - if isinstance(G, nxcg.Graph): + if isinstance(G, nxcg.CudaGraph): if G.is_directed(): raise ValueError("Only undirected graphs supported; got a directed graph") return G @@ -688,7 +780,7 @@ def _to_undirected_graph( raise TypeError -@networkx_algorithm(version_added="24.08") +@networkx_algorithm(version_added="24.08", fallback=True) def from_dict_of_lists(d, create_using=None): from .generators._utils import _create_using_class diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py index 38139b913cf..54975902861 100644 --- a/python/nx-cugraph/nx_cugraph/convert_matrix.py +++ b/python/nx-cugraph/nx_cugraph/convert_matrix.py @@ -14,6 +14,8 @@ import networkx as nx import numpy as np +from nx_cugraph import _nxver + from .generators._utils import _create_using_class from .utils import _cp_iscopied_asarray, index_dtype, networkx_algorithm @@ -24,7 +26,7 @@ # Value columns with string dtype is not supported -@networkx_algorithm(is_incomplete=True, version_added="23.12") +@networkx_algorithm(is_incomplete=True, version_added="23.12", fallback=True) def from_pandas_edgelist( df, source="source", @@ -138,7 +140,7 @@ def from_pandas_edgelist( and ( # In nx <= 3.3, `edge_key` was ignored if `edge_attr` is None edge_attr is not None - or nx.__version__[:3] > "3.3" + or _nxver > (3, 3) ) ): try: @@ -161,7 +163,7 @@ def from_pandas_edgelist( return G -@networkx_algorithm(version_added="23.12") +@networkx_algorithm(version_added="23.12", fallback=True) def from_scipy_sparse_array( A, parallel_edges=False, create_using=None, edge_attribute="weight" ): diff --git a/python/nx-cugraph/nx_cugraph/generators/_utils.py b/python/nx-cugraph/nx_cugraph/generators/_utils.py index e38ace5b28d..bc9ab84bdad 100644 --- a/python/nx-cugraph/nx_cugraph/generators/_utils.py +++ b/python/nx-cugraph/nx_cugraph/generators/_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,6 +16,7 @@ import networkx as nx import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import index_dtype @@ -74,7 +75,7 @@ def _common_small_graph(n, nodes, create_using, *, allow_directed=True): return G -def _create_using_class(create_using, *, default=nxcg.Graph): +def _create_using_class(create_using, *, default=nx.Graph): """Handle ``create_using`` argument and return a Graph type from nx_cugraph.""" inplace = False if create_using is None: @@ -85,16 +86,17 @@ def _create_using_class(create_using, *, default=nxcg.Graph): create_using, "is_multigraph" ): raise TypeError("create_using is not a valid graph type or instance") - elif not isinstance(create_using, nxcg.Graph): + elif not isinstance(create_using, (nxcg.Graph, nxcg.CudaGraph)): raise NotImplementedError( f"create_using with object of type {type(create_using)} is not supported " - "by the cugraph backend; only nx_cugraph.Graph objects are allowed." + "by the cugraph backend; only nx_cugraph.Graph or nx_cugraph.CudaGraph " + "objects are allowed." ) else: inplace = True G = create_using G.clear() - if not isinstance(G, nxcg.Graph): + if not isinstance(G, (nxcg.Graph, nxcg.CudaGraph)): if G.is_multigraph(): if G.is_directed(): graph_class = nxcg.MultiDiGraph @@ -104,10 +106,12 @@ def _create_using_class(create_using, *, default=nxcg.Graph): graph_class = nxcg.DiGraph else: graph_class = nxcg.Graph + if _nxver >= (3, 3) and not nx.config.backends.cugraph.use_compat_graphs: + graph_class = graph_class.to_cudagraph_class() if G.__class__ not in {nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph}: raise NotImplementedError( f"create_using with type {type(G)} is not supported by the cugraph " - "backend; only standard networkx or nx_cugraph Graph objects are " + "backend; only standard networkx or nx_cugraph graph objects are " "allowed (but not customized subclasses derived from them)." ) else: diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py index a548beea34f..cfcb2a3afec 100644 --- a/python/nx-cugraph/nx_cugraph/generators/classic.py +++ b/python/nx-cugraph/nx_cugraph/generators/classic.py @@ -18,6 +18,7 @@ import numpy as np import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import _get_int_dtype, index_dtype, networkx_algorithm from ._utils import ( @@ -102,7 +103,9 @@ def complete_graph(n, create_using=None): @networkx_algorithm(version_added="23.12") def complete_multipartite_graph(*subset_sizes): if not subset_sizes: - return nxcg.Graph() + if _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs: + return nxcg.Graph() + return nxcg.CudaGraph() try: subset_sizes = [_ensure_int(size) for size in subset_sizes] except TypeError: @@ -139,6 +142,8 @@ def complete_multipartite_graph(*subset_sizes): dst_indices, node_values={"subset": subsets_array}, id_to_key=nodes, + use_compat_graph=_nxver < (3, 3) + or nx.config.backends.cugraph.use_compat_graphs, ) diff --git a/python/nx-cugraph/nx_cugraph/generators/community.py b/python/nx-cugraph/nx_cugraph/generators/community.py index 9b0e0848de9..4e5063cc345 100644 --- a/python/nx-cugraph/nx_cugraph/generators/community.py +++ b/python/nx-cugraph/nx_cugraph/generators/community.py @@ -11,8 +11,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import cupy as cp +import networkx as nx import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import networkx_algorithm from ._utils import ( @@ -42,4 +44,7 @@ def caveman_graph(l, k): # noqa: E741 dst_cliques.extend(dst_clique + i * k for i in range(1, l)) src_indices = cp.hstack(src_cliques) dst_indices = cp.hstack(dst_cliques) - return nxcg.Graph.from_coo(l * k, src_indices, dst_indices) + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( + l * k, src_indices, dst_indices, use_compat_graph=use_compat_graph + ) diff --git a/python/nx-cugraph/nx_cugraph/generators/ego.py b/python/nx-cugraph/nx_cugraph/generators/ego.py index 66c9c8b95ee..9a91fa0b6c3 100644 --- a/python/nx-cugraph/nx_cugraph/generators/ego.py +++ b/python/nx-cugraph/nx_cugraph/generators/ego.py @@ -32,7 +32,10 @@ def ego_graph( ): """Weighted ego_graph with negative cycles is not yet supported. `NotImplementedError` will be raised if there are negative `distance` edge weights.""" # noqa: E501 if isinstance(G, nx.Graph): + is_compat_graph = isinstance(G, nxcg.Graph) G = nxcg.from_networkx(G, preserve_all_attrs=True) + else: + is_compat_graph = False if n not in G: if distance is None: raise nx.NodeNotFound(f"Source {n} is not in G") @@ -100,7 +103,10 @@ def ego_graph( node_mask &= node_ids != src_index node_ids = node_ids[node_mask] if node_ids.size == G._N: - return G.copy() + rv = G.copy() + if is_compat_graph: + return rv._to_compat_graph() + return rv # TODO: create renumbering helper function(s) node_ids.sort() # TODO: is this ever necessary? Keep for safety node_values = {key: val[node_ids] for key, val in G.node_values.items()} @@ -137,6 +143,7 @@ def ego_graph( "node_values": node_values, "node_masks": node_masks, "key_to_id": key_to_id, + "use_compat_graph": False, } if G.is_multigraph(): if G.edge_keys is not None: @@ -147,6 +154,8 @@ def ego_graph( kwargs["edge_indices"] = G.edge_indices[edge_mask] rv = G.__class__.from_coo(**kwargs) rv.graph.update(G.graph) + if is_compat_graph: + return rv._to_compat_graph() return rv diff --git a/python/nx-cugraph/nx_cugraph/generators/small.py b/python/nx-cugraph/nx_cugraph/generators/small.py index 45487571cda..d0c03cb7dd4 100644 --- a/python/nx-cugraph/nx_cugraph/generators/small.py +++ b/python/nx-cugraph/nx_cugraph/generators/small.py @@ -14,6 +14,7 @@ import networkx as nx import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import index_dtype, networkx_algorithm from ._utils import _IS_NX32_OR_LESS, _create_using_class @@ -449,7 +450,14 @@ def pappus_graph(): index_dtype, ) # fmt: on - return nxcg.Graph.from_coo(18, src_indices, dst_indices, name="Pappus Graph") + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( + 18, + src_indices, + dst_indices, + name="Pappus Graph", + use_compat_graph=use_compat_graph, + ) @networkx_algorithm(version_added="23.12") diff --git a/python/nx-cugraph/nx_cugraph/generators/social.py b/python/nx-cugraph/nx_cugraph/generators/social.py index 07e82c63fbf..09d405e7561 100644 --- a/python/nx-cugraph/nx_cugraph/generators/social.py +++ b/python/nx-cugraph/nx_cugraph/generators/social.py @@ -11,9 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. import cupy as cp +import networkx as nx import numpy as np import nx_cugraph as nxcg +from nx_cugraph import _nxver from ..utils import index_dtype, networkx_algorithm @@ -77,7 +79,8 @@ def davis_southern_women_graph(): "E13", "E14", ] # fmt: on - return nxcg.Graph.from_coo( + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( 32, src_indices, dst_indices, @@ -85,6 +88,7 @@ def davis_southern_women_graph(): id_to_key=women + events, top=women, bottom=events, + use_compat_graph=use_compat_graph, ) @@ -111,7 +115,14 @@ def florentine_families_graph(): "Salviati", "Strozzi", "Tornabuoni" ] # fmt: on - return nxcg.Graph.from_coo(15, src_indices, dst_indices, id_to_key=nodes) + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( + 15, + src_indices, + dst_indices, + id_to_key=nodes, + use_compat_graph=use_compat_graph, + ) @networkx_algorithm(version_added="23.12") @@ -165,13 +176,15 @@ def karate_club_graph(): "Officer", "Officer", "Officer", "Officer", "Officer", "Officer", ]) # fmt: on - return nxcg.Graph.from_coo( + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( 34, src_indices, dst_indices, edge_values={"weight": weights}, node_values={"club": clubs}, name="Zachary's Karate Club", + use_compat_graph=use_compat_graph, ) @@ -289,6 +302,12 @@ def les_miserables_graph(): "Zephine", ] # fmt: on - return nxcg.Graph.from_coo( - 77, src_indices, dst_indices, edge_values={"weight": weights}, id_to_key=nodes + use_compat_graph = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + return nxcg.CudaGraph.from_coo( + 77, + src_indices, + dst_indices, + edge_values={"weight": weights}, + id_to_key=nodes, + use_compat_graph=use_compat_graph, ) diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 4007230efa9..1a3d08409a2 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -18,6 +18,7 @@ import networkx as nx import nx_cugraph as nxcg +from nx_cugraph import _nxver class BackendInterface: @@ -32,11 +33,19 @@ def convert_from_nx(graph, *args, edge_attrs=None, weight=None, **kwargs): "edge_attrs and weight arguments should not both be given" ) edge_attrs = {weight: 1} - return nxcg.from_networkx(graph, *args, edge_attrs=edge_attrs, **kwargs) + return nxcg.from_networkx( + graph, + *args, + edge_attrs=edge_attrs, + use_compat_graph=_nxver < (3, 3) + or nx.config.backends.cugraph.use_compat_graphs, + **kwargs, + ) @staticmethod def convert_to_nx(obj, *, name: str | None = None): - if isinstance(obj, nxcg.Graph): + if isinstance(obj, nxcg.CudaGraph): + # Observe that this does not try to convert Graph! return nxcg.to_networkx(obj) return obj @@ -62,19 +71,32 @@ def key(testpath): return (testname, frozenset({classname, filename})) return (testname, frozenset({filename})) + use_compat_graph = ( + _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + ) + fallback = use_compat_graph or nx.utils.backends._dispatchable._fallback_to_nx + # Reasons for xfailing + # For nx version <= 3.1 no_weights = "weighted implementation not currently supported" no_multigraph = "multigraphs not currently supported" + # For nx version <= 3.2 + nx_cugraph_in_test_setup = ( + "nx-cugraph Graph is incompatible in test setup in nx versions < 3.3" + ) + # For all versions louvain_different = "Louvain may be different due to RNG" - no_string_dtype = "string edge values not currently supported" sssp_path_different = "sssp may choose a different valid path" + tuple_elements_preferred = "elements are tuples instead of lists" + no_mixed_dtypes_for_nodes = ( + # This one is tricky b/c we don't raise; all dtypes are treated as str + "mixed dtypes (str, int, float) for single node property not supported" + ) + # These shouldn't fail if using Graph or falling back to networkx + no_string_dtype = "string edge values not currently supported" no_object_dtype_for_edges = ( "Edges don't support object dtype (lists, strings, etc.)" ) - tuple_elements_preferred = "elements are tuples instead of lists" - nx_cugraph_in_test_setup = ( - "nx-cugraph Graph is incompatible in test setup in nx versions < 3.3" - ) xfail = { # This is removed while strongly_connected_components() is not @@ -98,38 +120,6 @@ def key(testpath): "test_cycles.py:TestMinimumCycleBasis." "test_gh6787_and_edge_attribute_names" ): sssp_path_different, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash" - ): no_object_dtype_for_edges, - key( - "test_graph_hashing.py:" - "test_isomorphic_edge_attr_and_node_attr_subgraph_hash" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPUndirected.test_summary_graph" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPDirected.test_summary_graph" - ): no_object_dtype_for_edges, - key("test_gexf.py:TestGEXF.test_relabel"): no_object_dtype_for_edges, - key( - "test_gml.py:TestGraph.test_parse_gml_cytoscape_bug" - ): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_parse_gml"): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_read_gml"): no_object_dtype_for_edges, - key("test_gml.py:TestGraph.test_data_types"): no_object_dtype_for_edges, - key( - "test_gml.py:TestPropertyLists.test_reading_graph_with_list_property" - ): no_object_dtype_for_edges, key( "test_relabel.py:" "test_relabel_preserve_node_order_partial_mapping_with_copy_false" @@ -138,48 +128,107 @@ def key(testpath): "test_gml.py:" "TestPropertyLists.test_reading_graph_with_single_element_list_property" ): tuple_elements_preferred, - key( - "test_relabel.py:" - "TestRelabel.test_relabel_multidigraph_inout_merge_nodes" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_inplace" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_inplace" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_inout_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multidigraph_merge_copy" - ): no_string_dtype, - key( - "test_relabel.py:TestRelabel.test_relabel_multigraph_nonnumeric_key" - ): no_string_dtype, - key("test_contraction.py:test_multigraph_path"): no_object_dtype_for_edges, - key( - "test_contraction.py:test_directed_multigraph_path" - ): no_object_dtype_for_edges, - key( - "test_contraction.py:test_multigraph_blockmodel" - ): no_object_dtype_for_edges, - key( - "test_summarization.py:TestSNAPUndirectedMulti.test_summary_graph" - ): no_string_dtype, - key( - "test_summarization.py:TestSNAPDirectedMulti.test_summary_graph" - ): no_string_dtype, } + if not fallback: + xfail.update( + { + key( + "test_graph_hashing.py:test_isomorphic_edge_attr" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:test_isomorphic_edge_attr_and_node_attr" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:test_isomorphic_edge_attr_subgraph_hash" + ): no_object_dtype_for_edges, + key( + "test_graph_hashing.py:" + "test_isomorphic_edge_attr_and_node_attr_subgraph_hash" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPNoEdgeTypes.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPUndirected.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:TestSNAPDirected.test_summary_graph" + ): no_object_dtype_for_edges, + key( + "test_gexf.py:TestGEXF.test_relabel" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_parse_gml_cytoscape_bug" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_parse_gml" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_read_gml" + ): no_object_dtype_for_edges, + key( + "test_gml.py:TestGraph.test_data_types" + ): no_object_dtype_for_edges, + key( + "test_gml.py:" + "TestPropertyLists.test_reading_graph_with_list_property" + ): no_object_dtype_for_edges, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_inout_merge_nodes" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multigraph_merge_inplace" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_merge_inplace" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_inout_copy" + ): no_string_dtype, + key( + "test_relabel.py:TestRelabel.test_relabel_multigraph_merge_copy" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multidigraph_merge_copy" + ): no_string_dtype, + key( + "test_relabel.py:" + "TestRelabel.test_relabel_multigraph_nonnumeric_key" + ): no_string_dtype, + key( + "test_contraction.py:test_multigraph_path" + ): no_object_dtype_for_edges, + key( + "test_contraction.py:test_directed_multigraph_path" + ): no_object_dtype_for_edges, + key( + "test_contraction.py:test_multigraph_blockmodel" + ): no_object_dtype_for_edges, + key( + "test_summarization.py:" + "TestSNAPUndirectedMulti.test_summary_graph" + ): no_string_dtype, + key( + "test_summarization.py:TestSNAPDirectedMulti.test_summary_graph" + ): no_string_dtype, + } + ) + else: + xfail.update( + { + key( + "test_gml.py:" + "TestPropertyLists.test_reading_graph_with_list_property" + ): no_mixed_dtypes_for_nodes, + } + ) - from packaging.version import parse - - nxver = parse(nx.__version__) - - if nxver.major == 3 and nxver.minor <= 2: + if _nxver <= (3, 2): xfail.update( { # NetworkX versions prior to 3.2.1 have tests written to @@ -216,7 +265,7 @@ def key(testpath): } ) - if nxver.major == 3 and nxver.minor <= 1: + if _nxver <= (3, 1): # MAINT: networkx 3.0, 3.1 # NetworkX 3.2 added the ability to "fallback to nx" if backend algorithms # raise NotImplementedError or `can_run` returns False. The tests below @@ -332,24 +381,25 @@ def key(testpath): xfail[key("test_louvain.py:test_threshold")] = ( "Louvain does not support seed parameter" ) - if nxver.major == 3 and nxver.minor >= 2: - xfail.update( - { - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_multi_attr_incl_target" - ): no_string_dtype, - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_multidigraph_and_edge_attr" - ): no_string_dtype, - key( - "test_convert_pandas.py:TestConvertPandas." - "test_from_edgelist_int_attr_name" - ): no_string_dtype, - } - ) - if nxver.minor == 2: + if _nxver >= (3, 2): + if not fallback: + xfail.update( + { + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multi_attr_incl_target" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multidigraph_and_edge_attr" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_int_attr_name" + ): no_string_dtype, + } + ) + if _nxver[1] == 2: different_iteration_order = "Different graph data iteration order" xfail.update( { @@ -366,7 +416,7 @@ def key(testpath): ): different_iteration_order, } ) - elif nxver.minor >= 3: + elif _nxver[1] >= 3: xfail.update( { key("test_louvain.py:test_max_level"): louvain_different, diff --git a/python/nx-cugraph/nx_cugraph/relabel.py b/python/nx-cugraph/nx_cugraph/relabel.py index 20d1337a99c..e38e18c779e 100644 --- a/python/nx-cugraph/nx_cugraph/relabel.py +++ b/python/nx-cugraph/nx_cugraph/relabel.py @@ -29,13 +29,18 @@ @networkx_algorithm(version_added="24.08") def relabel_nodes(G, mapping, copy=True): + G_orig = G if isinstance(G, nx.Graph): - if not copy: + is_compat_graph = isinstance(G, nxcg.Graph) + if not copy and not is_compat_graph: raise RuntimeError( "Using `copy=False` is invalid when using a NetworkX graph " "as input to `nx_cugraph.relabel_nodes`" ) G = nxcg.from_networkx(G, preserve_all_attrs=True) + else: + is_compat_graph = False + it = range(G._N) if G.key_to_id is None else G.id_to_key if callable(mapping): previd_to_key = [mapping(node) for node in it] @@ -225,12 +230,13 @@ def relabel_nodes(G, mapping, copy=True): node_masks=node_masks, id_to_key=newid_to_key, key_to_id=key_to_newid, + use_compat_graph=is_compat_graph, **extra_kwargs, ) rv.graph.update(G.graph) if not copy: - G._become(rv) - return G + G_orig._become(rv) + return G_orig return rv @@ -241,7 +247,10 @@ def convert_node_labels_to_integers( if ordering not in {"default", "sorted", "increasing degree", "decreasing degree"}: raise nx.NetworkXError(f"Unknown node ordering: {ordering}") if isinstance(G, nx.Graph): + is_compat_graph = isinstance(G, nxcg.Graph) G = nxcg.from_networkx(G, preserve_all_attrs=True) + else: + is_compat_graph = False G = G.copy() if label_attribute is not None: prev_vals = G.id_to_key @@ -279,4 +288,6 @@ def convert_node_labels_to_integers( key_to_id = G.key_to_id G.key_to_id = {i: key_to_id[n] for i, (d, n) in enumerate(pairs, first_label)} G._id_to_key = id_to_key + if is_compat_graph: + return G._to_compat_graph() return G diff --git a/python/nx-cugraph/nx_cugraph/tests/pytest.ini b/python/nx-cugraph/nx_cugraph/tests/pytest.ini new file mode 100644 index 00000000000..7b0a9f29fb1 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/pytest.ini @@ -0,0 +1,4 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native diff --git a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py index c2b22e98949..ad2c62c1fb9 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py @@ -12,11 +12,10 @@ # limitations under the License. import networkx as nx import pytest -from packaging.version import parse -nxver = parse(nx.__version__) +from nx_cugraph import _nxver -if nxver.major == 3 and nxver.minor < 2: +if _nxver < (3, 2): pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_classes.py b/python/nx-cugraph/nx_cugraph/tests/test_classes.py new file mode 100644 index 00000000000..0ac238b3558 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_classes.py @@ -0,0 +1,77 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import nx_cugraph as nxcg + + +def test_class_to_class(): + """Basic sanity checks to ensure metadata relating graph classes are accurate.""" + for prefix in ["", "Cuda"]: + for suffix in ["Graph", "DiGraph", "MultiGraph", "MultiDiGraph"]: + cls_name = f"{prefix}{suffix}" + cls = getattr(nxcg, cls_name) + assert cls.__name__ == cls_name + G = cls() + assert cls is G.__class__ + # cudagraph + val = cls.to_cudagraph_class() + val2 = G.to_cudagraph_class() + assert val is val2 + assert val.__name__ == f"Cuda{suffix}" + assert val.__module__.startswith("nx_cugraph") + assert cls.is_directed() == G.is_directed() == val.is_directed() + assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph() + # networkx + val = cls.to_networkx_class() + val2 = G.to_networkx_class() + assert val is val2 + assert val.__name__ == suffix + assert val.__module__.startswith("networkx") + val = val() + assert cls.is_directed() == G.is_directed() == val.is_directed() + assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph() + # directed + val = cls.to_directed_class() + val2 = G.to_directed_class() + assert val is val2 + assert val.__module__.startswith("nx_cugraph") + assert val.is_directed() + assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph() + if "Di" in suffix: + assert val is cls + else: + assert "Di" in val.__name__ + assert prefix in val.__name__ + assert cls.to_undirected_class() is cls + # undirected + val = cls.to_undirected_class() + val2 = G.to_undirected_class() + assert val is val2 + assert val.__module__.startswith("nx_cugraph") + assert not val.is_directed() + assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph() + if "Di" not in suffix: + assert val is cls + else: + assert "Di" not in val.__name__ + assert prefix in val.__name__ + assert cls.to_directed_class() is cls + # "zero" + if prefix == "Cuda": + val = cls._to_compat_graph_class() + val2 = G._to_compat_graph_class() + assert val is val2 + assert val.__name__ == suffix + assert val.__module__.startswith("nx_cugraph") + assert val.to_cudagraph_class() is cls + assert cls.is_directed() == G.is_directed() == val.is_directed() + assert cls.is_multigraph() == G.is_multigraph() == val.is_multigraph() diff --git a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py index ad4770f1ab8..fd8e1b3cf13 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py @@ -12,11 +12,10 @@ # limitations under the License. import networkx as nx import pytest -from packaging.version import parse -nxver = parse(nx.__version__) +from nx_cugraph import _nxver -if nxver.major == 3 and nxver.minor < 2: +if _nxver < (3, 2): pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert.py b/python/nx-cugraph/nx_cugraph/tests/test_convert.py index 634b28e961c..3d109af8a74 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_convert.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_convert.py @@ -13,13 +13,10 @@ import cupy as cp import networkx as nx import pytest -from packaging.version import parse import nx_cugraph as nxcg from nx_cugraph import interface -nxver = parse(nx.__version__) - @pytest.mark.parametrize( "graph_class", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph] diff --git a/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py b/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py index 5474f9d79e3..0697a744e85 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_ego_graph.py @@ -12,16 +12,13 @@ # limitations under the License. import networkx as nx import pytest -from packaging.version import parse import nx_cugraph as nxcg +from nx_cugraph import _nxver from .testing_utils import assert_graphs_equal -nxver = parse(nx.__version__) - - -if nxver.major == 3 and nxver.minor < 2: +if _nxver < (3, 2): pytest.skip("Need NetworkX >=3.2 to test ego_graph", allow_module_level=True) @@ -49,7 +46,12 @@ def test_ego_graph_cycle_graph( kwargs = {"radius": radius, "center": center, "undirected": undirected} Hnx = nx.ego_graph(Gnx, n, **kwargs) Hcg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") + use_compat_graphs = _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs + assert_graphs_equal(Hnx, Hcg._cudagraph if use_compat_graphs else Hcg) + Hcg = nx.ego_graph(Gcg, n, **kwargs) assert_graphs_equal(Hnx, Hcg) + Hcg = nx.ego_graph(Gcg._to_compat_graph(), n, **kwargs) + assert_graphs_equal(Hnx, Hcg._cudagraph) with pytest.raises(nx.NodeNotFound, match="not in G"): nx.ego_graph(Gnx, -1, **kwargs) with pytest.raises(nx.NodeNotFound, match="not in G"): @@ -61,20 +63,36 @@ def test_ego_graph_cycle_graph( kwargs["distance"] = "weight" H2nx = nx.ego_graph(Gnx, n, **kwargs) - is_nx32 = nxver.major == 3 and nxver.minor == 2 + is_nx32 = _nxver[:2] == (3, 2) if undirected and Gnx.is_directed() and Gnx.is_multigraph(): if is_nx32: # `should_run` was added in nx 3.3 match = "Weighted ego_graph with undirected=True not implemented" + elif _nxver >= (3, 4): + match = "not implemented by 'cugraph'" else: match = "not implemented by cugraph" - with pytest.raises(RuntimeError, match=match): + with pytest.raises( + RuntimeError if _nxver < (3, 4) else NotImplementedError, match=match + ): nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") with pytest.raises(NotImplementedError, match="ego_graph"): - nx.ego_graph(Gcg, n, **kwargs) + nx.ego_graph(Gcg, n, **kwargs, backend="cugraph") + if _nxver < (3, 4): + with pytest.raises(NotImplementedError, match="ego_graph"): + nx.ego_graph(Gcg, n, **kwargs) + else: + # This is an interesting case. `nxcg.ego_graph` is not implemented for + # these arguments, so it falls back to networkx. Hence, as it is currently + # implemented, the input graph is `nxcg.CudaGraph`, but the output graph + # is `nx.Graph`. Should networkx convert back to "cugraph" backend? + # TODO: make fallback to networkx configurable. + H2cg = nx.ego_graph(Gcg, n, **kwargs) + assert type(H2nx) is type(H2cg) + assert_graphs_equal(H2nx, nxcg.from_networkx(H2cg, preserve_all_attrs=True)) else: H2cg = nx.ego_graph(Gnx, n, **kwargs, backend="cugraph") - assert_graphs_equal(H2nx, H2cg) + assert_graphs_equal(H2nx, H2cg._cudagraph if use_compat_graphs else H2cg) with pytest.raises(nx.NodeNotFound, match="not found in graph"): nx.ego_graph(Gnx, -1, **kwargs) with pytest.raises(nx.NodeNotFound, match="not found in graph"): diff --git a/python/nx-cugraph/nx_cugraph/tests/test_generators.py b/python/nx-cugraph/nx_cugraph/tests/test_generators.py index c751b0fe2b3..5c405f1c93b 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_generators.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_generators.py @@ -13,25 +13,24 @@ import networkx as nx import numpy as np import pytest -from packaging.version import parse import nx_cugraph as nxcg +from nx_cugraph import _nxver from .testing_utils import assert_graphs_equal -nxver = parse(nx.__version__) - - -if nxver.major == 3 and nxver.minor < 2: +if _nxver < (3, 2): pytest.skip("Need NetworkX >=3.2 to test generators", allow_module_level=True) def compare(name, create_using, *args, is_vanilla=False): exc1 = exc2 = None func = getattr(nx, name) - if isinstance(create_using, nxcg.Graph): + if isinstance(create_using, nxcg.CudaGraph): nx_create_using = nxcg.to_networkx(create_using) - elif isinstance(create_using, type) and issubclass(create_using, nxcg.Graph): + elif isinstance(create_using, type) and issubclass( + create_using, (nxcg.Graph, nxcg.CudaGraph) + ): nx_create_using = create_using.to_networkx_class() elif isinstance(create_using, nx.Graph): nx_create_using = create_using.copy() @@ -61,8 +60,27 @@ def compare(name, create_using, *args, is_vanilla=False): exc2 = exc if exc1 is not None or exc2 is not None: assert type(exc1) is type(exc2) + return + if isinstance(Gcg, nxcg.Graph): + # If the graph is empty, it may be on host, otherwise it should be on device + if len(G): + assert Gcg._is_on_gpu + assert not Gcg._is_on_cpu + assert_graphs_equal(G, Gcg._cudagraph) else: assert_graphs_equal(G, Gcg) + # Ensure the output type is correct + if is_vanilla: + if _nxver < (3, 3) or nx.config.backends.cugraph.use_compat_graphs: + assert isinstance(Gcg, nxcg.Graph) + else: + assert isinstance(Gcg, nxcg.CudaGraph) + elif isinstance(create_using, type) and issubclass( + create_using, (nxcg.Graph, nxcg.CudaGraph) + ): + assert type(Gcg) is create_using + elif isinstance(create_using, (nxcg.Graph, nxcg.CudaGraph)): + assert type(Gcg) is type(create_using) N = list(range(-1, 5)) @@ -76,6 +94,10 @@ def compare(name, create_using, *args, is_vanilla=False): nxcg.DiGraph, nxcg.MultiGraph, nxcg.MultiDiGraph, + nxcg.CudaGraph, + nxcg.CudaDiGraph, + nxcg.CudaMultiGraph, + nxcg.CudaMultiDiGraph, # These raise NotImplementedError # nx.Graph(), # nx.DiGraph(), @@ -85,6 +107,10 @@ def compare(name, create_using, *args, is_vanilla=False): nxcg.DiGraph(), nxcg.MultiGraph(), nxcg.MultiDiGraph(), + nxcg.CudaGraph(), + nxcg.CudaDiGraph(), + nxcg.CudaMultiGraph(), + nxcg.CudaMultiDiGraph(), None, object, # Bad input 7, # Bad input @@ -158,7 +184,7 @@ def compare(name, create_using, *args, is_vanilla=False): @pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING) def test_generator_noarg(name, create_using): print(name, create_using, type(create_using)) - if isinstance(create_using, nxcg.Graph) and name in { + if isinstance(create_using, nxcg.CudaGraph) and name in { # fmt: off "bull_graph", "chvatal_graph", "cubical_graph", "diamond_graph", "house_graph", "house_x_graph", "icosahedral_graph", "krackhardt_kite_graph", diff --git a/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py b/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py index 3120995a2b2..40a361b1084 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_graph_methods.py @@ -47,7 +47,7 @@ def _create_Gs(): @pytest.mark.parametrize("Gnx", _create_Gs()) @pytest.mark.parametrize("reciprocal", [False, True]) def test_to_undirected_directed(Gnx, reciprocal): - Gcg = nxcg.DiGraph(Gnx) + Gcg = nxcg.CudaDiGraph(Gnx) assert_graphs_equal(Gnx, Gcg) Hnx1 = Gnx.to_undirected(reciprocal=reciprocal) Hcg1 = Gcg.to_undirected(reciprocal=reciprocal) @@ -62,6 +62,6 @@ def test_multidigraph_to_undirected(): Gnx.add_edge(0, 1) Gnx.add_edge(0, 1) Gnx.add_edge(1, 0) - Gcg = nxcg.MultiDiGraph(Gnx) + Gcg = nxcg.CudaMultiDiGraph(Gnx) with pytest.raises(NotImplementedError): Gcg.to_undirected() diff --git a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py index 176b531a6e7..1a61c69b3e7 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py @@ -14,13 +14,10 @@ import inspect import networkx as nx -from packaging.version import parse import nx_cugraph as nxcg from nx_cugraph.utils import networkx_algorithm -nxver = parse(nx.__version__) - def test_match_signature_and_names(): """Simple test to ensure our signatures and basic module layout match networkx.""" diff --git a/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py b/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py index a8f189a4745..9208eea09f2 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_multigraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,7 +26,7 @@ def test_get_edge_data(test_nxcugraph): G.add_edge(0, 3) G.add_edge(0, 3) if test_nxcugraph: - G = nxcg.MultiGraph(G) + G = nxcg.CudaMultiGraph(G) default = object() assert G.get_edge_data(0, 0, default=default) is default assert G.get_edge_data("a", "b", default=default) is default @@ -60,7 +60,7 @@ def test_get_edge_data(test_nxcugraph): G = nx.MultiGraph() G.add_edge(0, 1) if test_nxcugraph: - G = nxcg.MultiGraph(G) + G = nxcg.CudaMultiGraph(G) assert G.get_edge_data(0, 1, default=default) == {0: {}} assert G.get_edge_data(0, 1, 0, default=default) == {} assert G.get_edge_data(0, 1, 1, default=default) is default diff --git a/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py new file mode 100644 index 00000000000..252f9e6bbb8 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_pagerank.py @@ -0,0 +1,40 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pandas as pd +import pytest + + +def test_pagerank_multigraph(): + """ + Ensures correct pagerank for Graphs and MultiGraphs when using from_pandas_edgelist. + + PageRank for MultiGraph should give different result compared to Graph; when using + a Graph, the duplicate edges should be dropped. + """ + df = pd.DataFrame( + {"source": [0, 1, 1, 1, 1, 1, 1, 2], "target": [1, 2, 2, 2, 2, 2, 2, 3]} + ) + expected_pr_for_G = nx.pagerank(nx.from_pandas_edgelist(df)) + expected_pr_for_MultiG = nx.pagerank( + nx.from_pandas_edgelist(df, create_using=nx.MultiGraph) + ) + + G = nx.from_pandas_edgelist(df, backend="cugraph") + actual_pr_for_G = nx.pagerank(G, backend="cugraph") + + MultiG = nx.from_pandas_edgelist(df, create_using=nx.MultiGraph, backend="cugraph") + actual_pr_for_MultiG = nx.pagerank(MultiG, backend="cugraph") + + assert actual_pr_for_G == pytest.approx(expected_pr_for_G) + assert actual_pr_for_MultiG == pytest.approx(expected_pr_for_MultiG) diff --git a/python/nx-cugraph/nx_cugraph/tests/testing_utils.py b/python/nx-cugraph/nx_cugraph/tests/testing_utils.py index 529a96efd81..50836acf55f 100644 --- a/python/nx-cugraph/nx_cugraph/tests/testing_utils.py +++ b/python/nx-cugraph/nx_cugraph/tests/testing_utils.py @@ -17,7 +17,7 @@ def assert_graphs_equal(Gnx, Gcg): assert isinstance(Gnx, nx.Graph) - assert isinstance(Gcg, nxcg.Graph) + assert isinstance(Gcg, nxcg.CudaGraph) assert (a := Gnx.number_of_nodes()) == (b := Gcg.number_of_nodes()), (a, b) assert (a := Gnx.number_of_edges()) == (b := Gcg.number_of_edges()), (a, b) assert (a := Gnx.is_directed()) == (b := Gcg.is_directed()), (a, b) diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py index 3c5de4f2936..16486996ba0 100644 --- a/python/nx-cugraph/nx_cugraph/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -16,10 +16,14 @@ from textwrap import dedent import networkx as nx +from networkx import NetworkXError from networkx.utils.decorators import nodes_or_number, not_implemented_for +from nx_cugraph import _nxver from nx_cugraph.interface import BackendInterface +from .misc import _And_NotImplementedError + try: from networkx.utils.backends import _registered_algorithms except ModuleNotFoundError: @@ -44,6 +48,7 @@ class networkx_algorithm: version_added: str is_incomplete: bool is_different: bool + _fallback: bool _plc_names: set[str] | None def __new__( @@ -59,6 +64,7 @@ def __new__( version_added: str, # Required is_incomplete: bool = False, # See self.extra_doc for details if True is_different: bool = False, # See self.extra_doc for details if True + fallback: bool = False, # Change non-nx exceptions to NotImplementedError _plc: str | set[str] | None = None, # Hidden from user, may be removed someday ): if func is None: @@ -70,10 +76,11 @@ def __new__( version_added=version_added, is_incomplete=is_incomplete, is_different=is_different, + fallback=fallback, _plc=_plc, ) instance = object.__new__(cls) - if nodes_or_number is not None and nx.__version__[:3] > "3.2": + if nodes_or_number is not None and _nxver > (3, 2): func = nx.utils.decorators.nodes_or_number(nodes_or_number)(func) # update_wrapper sets __wrapped__, which will be used for the signature update_wrapper(instance, func) @@ -100,6 +107,7 @@ def __new__( instance.version_added = version_added instance.is_incomplete = is_incomplete instance.is_different = is_different + instance.fallback = fallback # The docstring on our function is added to the NetworkX docstring. instance.extra_doc = ( dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None @@ -113,7 +121,7 @@ def __new__( # Set methods so they are in __dict__ instance._can_run = instance._can_run instance._should_run = instance._should_run - if nodes_or_number is not None and nx.__version__[:3] <= "3.2": + if nodes_or_number is not None and _nxver <= (3, 2): instance = nx.utils.decorators.nodes_or_number(nodes_or_number)(instance) return instance @@ -136,7 +144,14 @@ def _should_run(self, func): self.should_run = func def __call__(self, /, *args, **kwargs): - return self.__wrapped__(*args, **kwargs) + if not self.fallback: + return self.__wrapped__(*args, **kwargs) + try: + return self.__wrapped__(*args, **kwargs) + except NetworkXError: + raise + except Exception as exc: + raise _And_NotImplementedError(exc) from exc def __reduce__(self): return _restore_networkx_dispatched, (self.name,) diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py index 8526524f1de..01c25dd5983 100644 --- a/python/nx-cugraph/nx_cugraph/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -194,7 +194,7 @@ def _get_int_dtype( def _get_float_dtype( - dtype: Dtype, *, graph: nxcg.Graph | None = None, weight: EdgeKey | None = None + dtype: Dtype, *, graph: nxcg.CudaGraph | None = None, weight: EdgeKey | None = None ): """Promote dtype to float32 or float64 as appropriate.""" if dtype is None: @@ -238,3 +238,37 @@ def _cp_iscopied_asarray(a, *args, orig_object=None, **kwargs): ): return False, arr return True, arr + + +class _And_NotImplementedError(NotImplementedError): + """Additionally make an exception a ``NotImplementedError``. + + For example: + + >>> try: + ... raise _And_NotImplementedError(KeyError("missing")) + ... except KeyError: + ... pass + + or + + >>> try: + ... raise _And_NotImplementedError(KeyError("missing")) + ... except NotImplementedError: + ... pass + + """ + + def __new__(cls, exc): + exc_type = type(exc) + if issubclass(exc_type, NotImplementedError): + new_type = exc_type + else: + new_type = type( + f"{exc_type.__name__}{cls.__name__}", + (exc_type, NotImplementedError), + {}, + ) + instance = NotImplementedError.__new__(new_type) + instance.__init__(*exc.args) + return instance diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index 847444f9dd1..d145aa549da 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -18,15 +18,15 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", "Intended Audience :: Developers", "Topic :: Software Development :: Libraries :: Python Modules", @@ -34,13 +34,12 @@ classifiers = [ dependencies = [ "cupy-cuda11x>=12.0.0", "networkx>=3.0", - "numpy>=1.23,<2.0a0", - "pylibcugraph==24.10.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", + "pylibcugraph==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.optional-dependencies] test = [ - "packaging>=21", "pandas", "pytest", "pytest-benchmark", @@ -90,7 +89,7 @@ matrix-entry = "cuda_suffixed=true" [tool.black] line-length = 88 -target-version = ["py39", "py310", "py311"] +target-version = ["py310", "py311", "py312"] [tool.isort] sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] @@ -156,7 +155,7 @@ exclude_lines = [ [tool.ruff] # https://github.com/charliermarsh/ruff/ line-length = 88 -target-version = "py39" +target-version = "py310" [tool.ruff.lint] unfixable = [ "F841", # unused-variable (Note: can leave useless expression) @@ -170,6 +169,7 @@ external = [ ] ignore = [ # Would be nice to fix these + "B905", # `zip()` without an explicit `strict=` parameter (Note: possible since py39 was dropped; we should do this!) "D100", # Missing docstring in public module "D101", # Missing docstring in public class "D102", # Missing docstring in public method @@ -215,6 +215,7 @@ ignore = [ "SIM105", # Use contextlib.suppress(...) instead of try-except-pass (Note: try-except-pass is much faster) "SIM108", # Use ternary operator ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) "TRY003", # Avoid specifying long messages outside the exception class (Note: why?) + "UP038", # Use `X | Y` in `isinstance` call instead of `(X, Y)` (Note: tuple is faster for now) # Ignored categories "C90", # mccabe (Too strict, but maybe we should make things less complex) @@ -241,6 +242,7 @@ ignore = [ # Allow assert, print, RNG, and no docstring "nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] "_nx_cugraph/__init__.py" = ["E501"] +"nx_cugraph/__init__.py" = ["E402"] # Allow module level import not at top of file "nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms "nx_cugraph/generators/**/*py" = ["D205", "D401"] # Allow flexible docstrings for generators "nx_cugraph/interface.py" = ["D401"] # Flexible docstrings diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh index bceec53b7d5..5fb173cf939 100755 --- a/python/nx-cugraph/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -18,6 +18,10 @@ # testing takes longer. Without it, tests will xfail when encountering a # function that we don't implement. # +# NX_CUGRAPH_USE_COMPAT_GRAPHS, {"True", "False"}, default is "True" +# Whether to use `nxcg.Graph` as the nx_cugraph backend graph. +# A Graph should be a compatible NetworkX graph, so fewer tests should fail. +# # Coverage of `nx_cugraph.algorithms` is reported and is a good sanity check # that algorithms run. diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index f43b7db1279..045628e9c0d 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -33,6 +33,7 @@ option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before de OFF ) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) +option(USE_CUDA_MATH_WHEELS "Use the CUDA math wheels instead of the system libraries" OFF) if(NOT USE_CUGRAPH_OPS) message(STATUS "Disabling libcugraph functions that reference cugraph-ops") @@ -49,18 +50,38 @@ endif() include(rapids-cython-core) if (NOT cugraph_FOUND) + find_package(CUDAToolkit REQUIRED) + set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) set(CUDA_STATIC_RUNTIME ON) + set(CUDA_STATIC_MATH_LIBRARIES ON) set(USE_RAFT_STATIC ON) set(CUGRAPH_COMPILE_RAFT_LIB ON) set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) set(ALLOW_CLONE_CUGRAPH_OPS ON) + if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.0) + set(CUDA_STATIC_MATH_LIBRARIES OFF) + elseif(USE_CUDA_MATH_WHEELS) + message(FATAL_ERROR "Cannot use CUDA math wheels with CUDA < 12.0") + endif() + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) + if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) + set(rpaths + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" + ) + set_property(TARGET cugraph PROPERTY INSTALL_RPATH ${rpaths} APPEND) + endif() + set(cython_lib_dir pylibcugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir}) install(TARGETS cugraph_c DESTINATION ${cython_lib_dir}) diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index 90fce23282e..9f1b9924336 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -54,6 +54,8 @@ set(cython_sources triangle_count.pyx two_hop_neighbors.pyx uniform_neighbor_sample.pyx + biased_neighbor_sample.pyx + negative_sampling.pyx uniform_random_walks.pyx utils.pyx weakly_connected_components.pyx diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index b67acc8bbfc..26fa3f64ddd 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -39,6 +39,10 @@ from pylibcugraph.uniform_neighbor_sample import uniform_neighbor_sample +from pylibcugraph.biased_neighbor_sample import biased_neighbor_sample + +from pylibcugraph.negative_sampling import negative_sampling + from pylibcugraph.core_number import core_number from pylibcugraph.k_core import k_core diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 4da7c4328fd..aa19ce60908 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -352,7 +352,7 @@ cdef extern from "cugraph_c/algorithms.h": # biased random walks cdef cugraph_error_code_t \ - cugraph_based_random_walks( + cugraph_biased_random_walks( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/coo.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/coo.pxd new file mode 100644 index 00000000000..e466e6ee5a0 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/coo.pxd @@ -0,0 +1,71 @@ +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) + +cdef extern from "cugraph_c/coo.h": + ctypedef struct cugraph_coo_t: + pass + + ctypedef struct cugraph_coo_list_t: + pass + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_coo_get_sources( + cugraph_coo_t* coo + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_coo_get_destinations( + cugraph_coo_t* coo + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_coo_get_edge_weights( + cugraph_coo_t* coo + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_coo_get_edge_id( + cugraph_coo_t* coo + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_coo_get_edge_type( + cugraph_coo_t* coo + ) + + cdef size_t \ + cugraph_coo_list_size( + const cugraph_coo_list_t* coo_list + ) + + cdef cugraph_coo_t* \ + cugraph_coo_list_element( + cugraph_coo_list_t* coo_list, + size_t index) + + cdef void \ + cugraph_coo_free( + cugraph_coo_t* coo + ) + + cdef void \ + cugraph_coo_list_free( + cugraph_coo_list_t* coo_list + ) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd index f6d62377443..cda47e55f77 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -31,62 +31,16 @@ from pylibcugraph._cugraph_c.random cimport ( cugraph_rng_state_t, ) +from pylibcugraph._cugraph_c.coo cimport ( + cugraph_coo_t, + cugraph_coo_list_t, +) + cdef extern from "cugraph_c/graph_generators.h": ctypedef enum cugraph_generator_distribution_t: POWER_LAW UNIFORM - ctypedef struct cugraph_coo_t: - pass - - ctypedef struct cugraph_coo_list_t: - pass - - cdef cugraph_type_erased_device_array_view_t* \ - cugraph_coo_get_sources( - cugraph_coo_t* coo - ) - - cdef cugraph_type_erased_device_array_view_t* \ - cugraph_coo_get_destinations( - cugraph_coo_t* coo - ) - - cdef cugraph_type_erased_device_array_view_t* \ - cugraph_coo_get_edge_weights( - cugraph_coo_t* coo - ) - - cdef cugraph_type_erased_device_array_view_t* \ - cugraph_coo_get_edge_id( - cugraph_coo_t* coo - ) - - cdef cugraph_type_erased_device_array_view_t* \ - cugraph_coo_get_edge_type( - cugraph_coo_t* coo - ) - - cdef size_t \ - cugraph_coo_list_size( - const cugraph_coo_list_t* coo_list - ) - - cdef cugraph_coo_t* \ - cugraph_coo_list_element( - cugraph_coo_list_t* coo_list, - size_t index) - - cdef void \ - cugraph_coo_free( - cugraph_coo_t* coo - ) - - cdef void \ - cugraph_coo_list_free( - cugraph_coo_list_t* coo_list - ) - cdef cugraph_error_code_t \ cugraph_generate_rmat_edgelist( const cugraph_resource_handle_t* handle, diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index 0f852d9cecd..c982b12665a 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -39,7 +39,9 @@ from pylibcugraph._cugraph_c.random cimport ( from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_t, ) - +from pylibcugraph._cugraph_c.coo cimport ( + cugraph_coo_t, +) from pylibcugraph._cugraph_c.properties cimport ( cugraph_edge_property_view_t, ) @@ -103,3 +105,21 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_type_erased_device_array_t** vertices, cugraph_error_t** error ) + + # negative sampling + cdef cugraph_error_code_t \ + cugraph_negative_sampling( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + const cugraph_type_erased_device_array_view_t* src_bias, + const cugraph_type_erased_device_array_view_t* dst_bias, + size_t num_samples, + bool_t remove_duplicates, + bool_t remove_false_negatives, + bool_t exact_number_of_samples, + bool_t do_expensive_check, + cugraph_coo_t **result, + cugraph_error_t **error + ) diff --git a/python/pylibcugraph/pylibcugraph/biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/biased_neighbor_sample.pyx new file mode 100644 index 00000000000..2dd138d5d06 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/biased_neighbor_sample.pyx @@ -0,0 +1,448 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, + cugraph_type_erased_device_array_view_free, + cugraph_type_erased_host_array_view_t, + cugraph_type_erased_host_array_view_create, + cugraph_type_erased_host_array_view_free, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.algorithms cimport ( + cugraph_sample_result_t, + cugraph_prior_sources_behavior_t, + cugraph_compression_type_t, + cugraph_sampling_options_t, + cugraph_sampling_options_create, + cugraph_sampling_options_free, + cugraph_sampling_set_with_replacement, + cugraph_sampling_set_return_hops, + cugraph_sampling_set_prior_sources_behavior, + cugraph_sampling_set_dedupe_sources, + cugraph_sampling_set_renumber_results, + cugraph_sampling_set_compress_per_hop, + cugraph_sampling_set_compression_type, + cugraph_sampling_set_retain_seeds, +) +from pylibcugraph._cugraph_c.sampling_algorithms cimport ( + cugraph_biased_neighbor_sample, + +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + assert_CAI_type, + assert_AI_type, + get_c_type_from_numpy_type, +) +from pylibcugraph.internal_types.sampling_result cimport ( + SamplingResult, +) +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) +import warnings + +# TODO accept cupy/numpy random state in addition to raw seed. +def biased_neighbor_sample(ResourceHandle resource_handle, + _GPUGraph input_graph, + start_list, + h_fan_out, + *, + bool_t with_replacement, + bool_t do_expensive_check, + with_edge_properties=True, + batch_id_list=None, + label_list=None, + label_to_output_comm_rank=None, + label_offsets=None, + biases=None, + prior_sources_behavior=None, + deduplicate_sources=False, + return_hops=False, + renumber=False, + retain_seeds=False, + compression='COO', + compress_per_hop=False, + random_state=None, + return_dict=False,): + """ + Does neighborhood sampling, which samples nodes from a graph based on the + current node's neighbors, with a corresponding fanout value at each hop. + + Parameters + ---------- + resource_handle: ResourceHandle + Handle to the underlying device and host resources needed for + referencing data and running algorithms. + + input_graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + start_list: device array type + Device array containing the list of starting vertices for sampling. + + h_fan_out: numpy array type + Host array containing the branching out (fan-out) degrees per + starting vertex for each hop level. + + with_replacement: bool + If true, sampling procedure is done with replacement (the same vertex + can be selected multiple times in the same step). + + do_expensive_check: bool + If True, performs more extensive tests on the inputs to ensure + validitity, at the expense of increased run time. + + with_edge_properties: bool + This argument is present for compatibility with + uniform_neighbor_sample. Only the 'True' option is accepted. + All edge properties in the graph are returned. + + batch_id_list: list[int32] (Optional) + List of int32 batch ids that is returned with each edge. Optional + argument, defaults to NULL, returning nothing. + + label_list: list[int32] (Optional) + List of unique int32 batch ids. Required if also passing the + label_to_output_comm_rank flag. Default to NULL (does nothing) + + label_to_output_comm_rank: list[int32] (Optional) + Maps the unique batch ids in label_list to the rank of the + worker that should hold results for that batch id. + Defaults to NULL (does nothing) + + label_offsets: list[int] (Optional) + Offsets of each label within the start vertex list. + + biases: list[float32/64] (Optional) + Edge biases. If not provided, uses the weight property. + Currently unsupported. + + prior_sources_behavior: str (Optional) + Options are "carryover", and "exclude". + Default will leave the source list as-is. + Carryover will carry over sources from previous hops to the + current hop. + Exclude will exclude sources from previous hops from reappearing + as sources in future hops. + + deduplicate_sources: bool (Optional) + If True, will deduplicate the source list before sampling. + Defaults to False. + + renumber: bool (Optional) + If True, will renumber the sources and destinations on a + per-batch basis and return the renumber map and batch offsets + in additional to the standard returns. + + retain_seeds: bool (Optional) + If True, will retain the original seeds (original source vertices) + in the output even if they do not have outgoing neighbors. + Defaults to False. + + compression: str (Optional) + Options: COO (default), CSR, CSC, DCSR, DCSR + Sets the compression format for the returned samples. + + compress_per_hop: bool (Optional) + If False (default), will create a compressed edgelist for the + entire batch. + If True, will create a separate compressed edgelist per hop within + a batch. + + random_state: int (Optional) + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + + return_dict: bool (Optional) + Whether to return a dictionary instead of a tuple. + Optional argument, defaults to False, returning a tuple. + This argument will eventually be deprecated in favor + of always returning a dictionary. + + Returns + ------- + A tuple of device arrays, where the first and second items in the tuple + are device arrays containing the starting and ending vertices of each + walk respectively, the third item in the tuple is a device array + containing the start labels, and the fourth item in the tuple is a device + array containing the indices for reconstructing paths. + + If renumber was set to True, then the fifth item in the tuple is a device + array containing the renumber map, and the sixth item in the tuple is a + device array containing the renumber map offsets (which delineate where + the renumber map for each batch starts). + + """ + if biases is not None: + raise ValueError("The biases parameter is currently unsupported") + + if not with_edge_properties: + raise ValueError("with_edge_properties=False is not supported by biased_neighbor_sample") + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = ( + resource_handle.c_resource_handle_ptr + ) + + cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr + + cdef bool_t c_deduplicate_sources = deduplicate_sources + cdef bool_t c_return_hops = return_hops + cdef bool_t c_renumber = renumber + cdef bool_t c_compress_per_hop = compress_per_hop + + assert_CAI_type(start_list, "start_list") + assert_CAI_type(batch_id_list, "batch_id_list", True) + assert_CAI_type(label_list, "label_list", True) + assert_CAI_type(label_to_output_comm_rank, "label_to_output_comm_rank", True) + assert_CAI_type(label_offsets, "label_offsets", True) + assert_AI_type(h_fan_out, "h_fan_out") + + cdef cugraph_sample_result_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + cdef uintptr_t cai_start_ptr = \ + start_list.__cuda_array_interface__["data"][0] + + cdef uintptr_t cai_batch_id_ptr + if batch_id_list is not None: + cai_batch_id_ptr = \ + batch_id_list.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_list_ptr + if label_list is not None: + cai_label_list_ptr = \ + label_list.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_to_output_comm_rank_ptr + if label_to_output_comm_rank is not None: + cai_label_to_output_comm_rank_ptr = \ + label_to_output_comm_rank.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_offsets_ptr + if label_offsets is not None: + cai_label_offsets_ptr = \ + label_offsets.__cuda_array_interface__['data'][0] + + cdef uintptr_t ai_fan_out_ptr = \ + h_fan_out.__array_interface__["data"][0] + + cdef cugraph_type_erased_device_array_view_t* start_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_start_ptr, + len(start_list), + get_c_type_from_numpy_type(start_list.dtype)) + + cdef cugraph_type_erased_device_array_view_t* batch_id_ptr = NULL + if batch_id_list is not None: + batch_id_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_batch_id_ptr, + len(batch_id_list), + get_c_type_from_numpy_type(batch_id_list.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_list_ptr = NULL + if label_list is not None: + label_list_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_list_ptr, + len(label_list), + get_c_type_from_numpy_type(label_list.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_to_output_comm_rank_ptr = NULL + if label_to_output_comm_rank is not None: + label_to_output_comm_rank_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_to_output_comm_rank_ptr, + len(label_to_output_comm_rank), + get_c_type_from_numpy_type(label_to_output_comm_rank.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL + if retain_seeds: + if label_offsets is None: + raise ValueError("Must provide label offsets if retain_seeds is True") + label_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_offsets_ptr, + len(label_offsets), + get_c_type_from_numpy_type(label_offsets.dtype) + ) + + cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = \ + cugraph_type_erased_host_array_view_create( + ai_fan_out_ptr, + len(h_fan_out), + get_c_type_from_numpy_type(h_fan_out.dtype)) + + + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = \ + cg_rng_state.rng_state_ptr + + cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e + if prior_sources_behavior is None: + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT + elif prior_sources_behavior == 'carryover': + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER + elif prior_sources_behavior == 'exclude': + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE + else: + raise ValueError( + f'Invalid option {prior_sources_behavior}' + ' for prior sources behavior' + ) + + cdef cugraph_compression_type_t compression_behavior_e + if compression is None or compression == 'COO': + compression_behavior_e = cugraph_compression_type_t.COO + elif compression == 'CSR': + compression_behavior_e = cugraph_compression_type_t.CSR + elif compression == 'CSC': + compression_behavior_e = cugraph_compression_type_t.CSC + elif compression == 'DCSR': + compression_behavior_e = cugraph_compression_type_t.DCSR + elif compression == 'DCSC': + compression_behavior_e = cugraph_compression_type_t.DCSC + else: + raise ValueError( + f'Invalid option {compression}' + ' for compression type' + ) + + cdef cugraph_sampling_options_t* sampling_options + error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr) + assert_success(error_code, error_ptr, "cugraph_sampling_options_create") + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement) + cugraph_sampling_set_return_hops(sampling_options, c_return_hops) + cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources) + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e) + cugraph_sampling_set_renumber_results(sampling_options, c_renumber) + cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e) + cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop) + cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds) + + error_code = cugraph_biased_neighbor_sample( + c_resource_handle_ptr, + c_graph_ptr, + NULL, + start_ptr, + batch_id_ptr, + label_list_ptr, + label_to_output_comm_rank_ptr, + label_offsets_ptr, + fan_out_ptr, + rng_state_ptr, + sampling_options, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_biased_neighbor_sample") + + # Free the sampling options + cugraph_sampling_options_free(sampling_options) + + # Free the two input arrays that are no longer needed. + cugraph_type_erased_device_array_view_free(start_ptr) + cugraph_type_erased_host_array_view_free(fan_out_ptr) + if batch_id_list is not None: + cugraph_type_erased_device_array_view_free(batch_id_ptr) + if label_offsets is not None: + cugraph_type_erased_device_array_view_free(label_offsets_ptr) + + # Have the SamplingResult instance assume ownership of the result data. + result = SamplingResult() + result.set_ptr(result_ptr) + + # Get cupy "views" of the individual arrays to return. These each increment + # the refcount on the SamplingResult instance which will keep the data alive + # until all references are removed and the GC runs. + cupy_majors = result.get_majors() + cupy_major_offsets = result.get_major_offsets() + cupy_minors = result.get_minors() + cupy_edge_weights = result.get_edge_weights() + cupy_edge_ids = result.get_edge_ids() + cupy_edge_types = result.get_edge_types() + cupy_batch_ids = result.get_batch_ids() + cupy_label_hop_offsets = result.get_label_hop_offsets() + + if renumber: + cupy_renumber_map = result.get_renumber_map() + cupy_renumber_map_offsets = result.get_renumber_map_offsets() + + if return_dict: + return { + 'major_offsets': cupy_major_offsets, + 'majors': cupy_majors, + 'minors': cupy_minors, + 'weight': cupy_edge_weights, + 'edge_id': cupy_edge_ids, + 'edge_type': cupy_edge_types, + 'batch_id': cupy_batch_ids, + 'label_hop_offsets': cupy_label_hop_offsets, + 'hop_id': None, + 'renumber_map': cupy_renumber_map, + 'renumber_map_offsets': cupy_renumber_map_offsets + } + else: + cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors + return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, None, cupy_renumber_map, cupy_renumber_map_offsets) + else: + cupy_hop_ids = result.get_hop_ids() + if return_dict: + return { + 'major_offsets': cupy_major_offsets, + 'majors': cupy_majors, + 'minors': cupy_minors, + 'weight': cupy_edge_weights, + 'edge_id': cupy_edge_ids, + 'edge_type': cupy_edge_types, + 'batch_id': cupy_batch_ids, + 'label_hop_offsets': cupy_label_hop_offsets, + 'hop_id': cupy_hop_ids, + } + else: + cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors + return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, cupy_hop_ids) diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx index f38ad21d3b0..4ea96920e61 100644 --- a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx @@ -26,11 +26,7 @@ from pylibcugraph._cugraph_c.error cimport ( from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, ) -from pylibcugraph._cugraph_c.graph_generators cimport ( - cugraph_generate_rmat_edgelist, - cugraph_generate_edge_weights, - cugraph_generate_edge_ids, - cugraph_generate_edge_types, +from pylibcugraph._cugraph_c.coo cimport ( cugraph_coo_t, cugraph_coo_get_sources, cugraph_coo_get_destinations, @@ -39,6 +35,12 @@ from pylibcugraph._cugraph_c.graph_generators cimport ( cugraph_coo_get_edge_type, cugraph_coo_free, ) +from pylibcugraph._cugraph_c.graph_generators cimport ( + cugraph_generate_rmat_edgelist, + cugraph_generate_edge_weights, + cugraph_generate_edge_ids, + cugraph_generate_edge_types, +) from pylibcugraph.resource_handle cimport ( ResourceHandle, ) diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx index 32af0c13fc0..7de48708f80 100644 --- a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx @@ -26,14 +26,9 @@ from pylibcugraph._cugraph_c.error cimport ( from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, ) -from pylibcugraph._cugraph_c.graph_generators cimport ( - cugraph_generate_rmat_edgelists, - cugraph_generate_edge_weights, - cugraph_generate_edge_ids, - cugraph_generate_edge_types, +from pylibcugraph._cugraph_c.coo cimport ( cugraph_coo_t, cugraph_coo_list_t, - cugraph_generator_distribution_t, cugraph_coo_get_sources, cugraph_coo_get_destinations, cugraph_coo_get_edge_weights, @@ -44,6 +39,13 @@ from pylibcugraph._cugraph_c.graph_generators cimport ( cugraph_coo_free, cugraph_coo_list_free, ) +from pylibcugraph._cugraph_c.graph_generators cimport ( + cugraph_generate_rmat_edgelists, + cugraph_generate_edge_weights, + cugraph_generate_edge_ids, + cugraph_generate_edge_types, + cugraph_generator_distribution_t, +) from pylibcugraph.resource_handle cimport ( ResourceHandle, ) diff --git a/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt index 1ca169c5869..22f07939db0 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/internal_types/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -14,6 +14,7 @@ set(cython_sources sampling_result.pyx + coo.pyx ) set(linked_libraries cugraph::cugraph;cugraph::cugraph_c) diff --git a/python/pylibcugraph/pylibcugraph/internal_types/coo.pxd b/python/pylibcugraph/pylibcugraph/internal_types/coo.pxd new file mode 100644 index 00000000000..129b0be4dbe --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/internal_types/coo.pxd @@ -0,0 +1,28 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + + +from pylibcugraph._cugraph_c.coo cimport ( + cugraph_coo_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) + +cdef class COO: + cdef cugraph_coo_t* c_coo_ptr + cdef set_ptr(self, cugraph_coo_t* ptr) + cdef get_array(self, cugraph_type_erased_device_array_view_t* ptr) diff --git a/python/pylibcugraph/pylibcugraph/internal_types/coo.pyx b/python/pylibcugraph/pylibcugraph/internal_types/coo.pyx new file mode 100644 index 00000000000..64d10c22eaf --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/internal_types/coo.pyx @@ -0,0 +1,96 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from pylibcugraph._cugraph_c.coo cimport ( + cugraph_coo_t, + cugraph_coo_free, + cugraph_coo_get_sources, + cugraph_coo_get_destinations, + cugraph_coo_get_edge_weights, + cugraph_coo_get_edge_id, + cugraph_coo_get_edge_type, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph.utils cimport create_cupy_array_view_for_device_ptr + +cdef class COO: + """ + Cython interface to a cugraph_coo_t pointer. Instances of this + call will take ownership of the pointer and free it under standard python + GC rules (ie. when all references to it are no longer present). + + This class provides methods to return non-owning cupy ndarrays for the + corresponding array members. Returning these cupy arrays increments the ref + count on the COO instances from which the cupy arrays are + referencing. + """ + def __cinit__(self): + # This COO instance owns sample_result_ptr now. It will be + # freed when this instance is deleted (see __dealloc__()) + self.c_coo_ptr = NULL + + def __dealloc__(self): + if self.c_coo_ptr is not NULL: + cugraph_coo_free(self.c_coo_ptr) + + cdef set_ptr(self, cugraph_coo_t* ptr): + self.c_coo_ptr = ptr + + cdef get_array(self, cugraph_type_erased_device_array_view_t* ptr): + if ptr is NULL: + return None + + return create_cupy_array_view_for_device_ptr( + ptr, + self, + ) + + def get_sources(self): + if self.c_coo_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_coo_get_sources(self.c_coo_ptr) + return self.get_array(ptr) + + def get_destinations(self): + if self.c_coo_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_coo_get_destinations(self.c_coo_ptr) + return self.get_array(ptr) + + def get_edge_ids(self): + if self.c_coo_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_coo_get_edge_id(self.c_coo_ptr) + return self.get_array(ptr) + + def get_edge_types(self): + if self.c_coo_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_coo_get_edge_type(self.c_coo_ptr) + return self.get_array(ptr) + + def get_edge_weights(self): + if self.c_coo_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* ptr = cugraph_coo_get_edge_weights(self.c_coo_ptr) + return self.get_array(ptr) diff --git a/python/pylibcugraph/pylibcugraph/negative_sampling.pyx b/python/pylibcugraph/pylibcugraph/negative_sampling.pyx new file mode 100644 index 00000000000..610cfa90ccf --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/negative_sampling.pyx @@ -0,0 +1,184 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t + +from pylibcugraph._cugraph_c.resource_handle cimport ( + cugraph_resource_handle_t, + bool_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, + cugraph_type_erased_device_array_view_free, + cugraph_type_erased_host_array_view_t, + cugraph_type_erased_host_array_view_create, + cugraph_type_erased_host_array_view_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.sampling_algorithms cimport ( + cugraph_negative_sampling, +) +from pylibcugraph._cugraph_c.coo cimport ( + cugraph_coo_t, +) +from pylibcugraph.internal_types.coo cimport ( + COO, +) +from pylibcugraph.utils cimport ( + assert_success, + assert_CAI_type, + create_cugraph_type_erased_device_array_view_from_py_obj, +) +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) + +def negative_sampling(ResourceHandle resource_handle, + _GPUGraph graph, + size_t num_samples, + random_state=None, + vertices=None, + src_bias=None, + dst_bias=None, + remove_duplicates=False, + remove_false_negatives=False, + exact_number_of_samples=False, + do_expensive_check=False): + """ + Performs negative sampling, which is essentially a form of graph generation. + + By setting vertices, src_bias, and dst_bias, this function can perform + biased negative sampling. + + Parameters + ---------- + resource_handle: ResourceHandle + Handle to the underlying device and host resources needed for + referencing data and running algorithms. + input_graph: SGGraph or MGGraph + The stored cuGraph graph to create negative samples for. + num_samples: int + The number of negative edges to generate for each positive edge. + random_state: int (Optional) + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + vertices: device array type (Optional) + Vertex ids corresponding to the src/dst biases, if provided. + Ignored if src/dst biases are not provided. + src_bias: device array type (Optional) + Probability per edge that a vertex is selected as a source vertex. + Does not have to be normalized. Uses a uniform distribution if + not provided. + dst_bias: device array type (Optional) + Probability per edge that a vertex is selected as a destination vertex. + Does not have to be normalized. Uses a uniform distribution if + not provided. + remove_duplicates: bool (Optional) + Whether to remove duplicate edges from the generated edgelist. + Defaults to False (does not remove duplicates). + remove_false_negatives: bool (Optional) + Whether to remove false negatives from the generated edgelist. + Defaults to False (does not check for and remove false negatives). + exact_number_of_samples: bool (Optional) + Whether to manually regenerate samples until the desired number + as specified by num_samples has been generated. + Defaults to False (does not regenerate if enough samples are not + produced in the initial round). + do_expensive_check: bool (Optional) + Whether to perform an expensive error check at the C++ level. + Defaults to False (no error check). + + Returns + ------- + dict[str, cupy.ndarray] + Generated edges in COO format. + """ + + assert_CAI_type(vertices, "vertices", True) + assert_CAI_type(src_bias, "src_bias", True) + assert_CAI_type(dst_bias, "dst_bias", True) + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = ( + resource_handle.c_resource_handle_ptr + ) + + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef bool_t c_remove_duplicates = remove_duplicates + cdef bool_t c_remove_false_negatives = remove_false_negatives + cdef bool_t c_exact_number_of_samples = exact_number_of_samples + cdef bool_t c_do_expensive_check = do_expensive_check + + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = \ + cg_rng_state.rng_state_ptr + + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj(vertices) + cdef cugraph_type_erased_device_array_view_t* src_bias_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj(src_bias) + cdef cugraph_type_erased_device_array_view_t* dst_bias_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj(dst_bias) + + cdef cugraph_coo_t* result_ptr + cdef cugraph_error_t* err_ptr + cdef cugraph_error_code_t error_code + + error_code = cugraph_negative_sampling( + c_resource_handle_ptr, + rng_state_ptr, + c_graph_ptr, + vertices_ptr, + src_bias_ptr, + dst_bias_ptr, + num_samples, + c_remove_duplicates, + c_remove_false_negatives, + c_exact_number_of_samples, + c_do_expensive_check, + &result_ptr, + &err_ptr, + ) + assert_success(error_code, err_ptr, "cugraph_negative_sampling") + + coo = COO() + coo.set_ptr(result_ptr) + + return { + 'sources': coo.get_sources(), + 'destinations': coo.get_destinations(), + 'edge_id': coo.get_edge_ids(), + 'edge_type': coo.get_edge_types(), + 'weight': coo.get_edge_weights(), + } diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx index f002622f497..f3e2336d8f6 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx @@ -117,7 +117,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, Device array containing the list of starting vertices for sampling. h_fan_out: numpy array type - Device array containing the brancing out (fan-out) degrees per + Host array containing the branching out (fan-out) degrees per starting vertex for each hop level. with_replacement: bool diff --git a/python/pylibcugraph/pyproject.toml b/python/pylibcugraph/pyproject.toml index 4dd513a4902..c12280473b5 100644 --- a/python/pylibcugraph/pyproject.toml +++ b/python/pylibcugraph/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "cython>=3.0.0", "rapids-build-backend>=0.3.1,<0.4.0.dev0", - "scikit-build-core[pyproject]>=0.7.0", + "scikit-build-core[pyproject]>=0.10.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "rapids_build_backend.build" @@ -21,23 +21,27 @@ authors = [ { name = "NVIDIA Corporation" }, ] license = { text = "Apache 2.0" } -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ - "pylibraft==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "nvidia-cublas", + "nvidia-curand", + "nvidia-cusolver", + "nvidia-cusparse", + "pylibraft==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] test = [ - "cudf==24.10.*,>=0.0.0a0", - "numpy>=1.23,<2.0a0", + "cudf==24.12.*,>=0.0.0a0", + "numpy>=1.23,<3.0a0", "pandas", "pytest", "pytest-benchmark", @@ -53,7 +57,8 @@ Documentation = "https://docs.rapids.ai/api/cugraph/stable/" [tool.scikit-build] build-dir = "build/{wheel_tag}" cmake.build-type = "Release" -cmake.minimum-version = "3.26.4" +cmake.version = "CMakeLists.txt" +minimum-version = "build-system.requires" ninja.make-fallback = true sdist.reproducible = true wheel.packages = ["pylibcugraph"] @@ -69,7 +74,7 @@ dependencies-file = "../../dependencies.yaml" requires = [ "cmake>=3.26.4,!=3.30.0", "ninja", - "pylibraft==24.10.*,>=0.0.0a0", - "rmm==24.10.*,>=0.0.0a0", + "pylibraft==24.12.*,>=0.0.0a0", + "rmm==24.12.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -matrix-entry = "cuda_suffixed=true" +matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" diff --git a/python/pylibcugraph/pytest.ini b/python/pylibcugraph/pytest.ini index 573628de680..d5ade9f4836 100644 --- a/python/pylibcugraph/pytest.ini +++ b/python/pylibcugraph/pytest.ini @@ -14,3 +14,5 @@ [pytest] markers = cugraph_ops: Tests requiring cugraph-ops + +addopts = --tb=native