Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose new all-pairs Similarity algorithms #4502

Merged
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ea0e66d
add plc implementation of all-pairs similarity leveraging the capi
jnke2016 Jun 24, 2024
9250562
import and use SIZE_MAX
jnke2016 Jun 26, 2024
f0c9edc
import vertex pairs retrieval function from the result_ptr and updat…
jnke2016 Jun 26, 2024
3dfe9fe
add sg implementation of all pairs similarity algorithms
jnke2016 Jun 26, 2024
4a89937
add mg implementation of all pairs jaccard similarity
jnke2016 Jun 26, 2024
5249f02
add mg tests for all pairs jaccard
jnke2016 Jul 3, 2024
d111e98
unrenumber the vertex_pairs
jnke2016 Jul 4, 2024
228f3fb
fix typo
jnke2016 Jul 5, 2024
2b6c002
add tests for the MG CAPI all pairs similarity
jnke2016 Jul 7, 2024
42cb715
add tests for all pairs similarity
jnke2016 Jul 8, 2024
7b893e3
add mg tests for all pairs similarity
jnke2016 Jul 8, 2024
3fcee12
add CAPI for cosine similarity
jnke2016 Jul 8, 2024
18ba600
add plc API for cosine similarity
jnke2016 Jul 8, 2024
2b32679
add all pairs sorensen, overlap and consine similarity to the python API
jnke2016 Jul 8, 2024
dfb1aad
add all pairs cosine similarity
jnke2016 Jul 8, 2024
9b0253f
add MG tests for cosine similarity
jnke2016 Jul 8, 2024
8facad3
properly instantiate similarity functions
jnke2016 Jul 8, 2024
30a3db6
add C tests for cosine similarity algo
jnke2016 Jul 8, 2024
41cb9b1
update similarity tests
jnke2016 Jul 8, 2024
f9cafa1
update MG similarity CAPI tests
jnke2016 Jul 8, 2024
1aec8e4
fix style
jnke2016 Jul 8, 2024
89e4da1
update tests for python SG similarity algos
jnke2016 Jul 8, 2024
40eb10e
fix style
jnke2016 Jul 8, 2024
17b285e
fix OOM issue
jnke2016 Jul 8, 2024
6c182f4
update tests
jnke2016 Jul 8, 2024
ba220b6
fix style
jnke2016 Jul 8, 2024
3bb7f94
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 10, 2024
060fbee
update copyright
jnke2016 Jul 10, 2024
dd79258
Merge remote-tracking branch 'upstream/branch-24.08' into branch-24.0…
jnke2016 Jul 13, 2024
7592382
update branch
jnke2016 Jul 25, 2024
3385243
update copyright
jnke2016 Jul 25, 2024
fa1c858
update docstrings to indicate that weight support
jnke2016 Jul 25, 2024
f7647e7
fix typo
jnke2016 Jul 25, 2024
57f4c4b
update docstrings description
jnke2016 Jul 25, 2024
4a843f8
remove debug statement
jnke2016 Jul 26, 2024
756d32b
fix typo
jnke2016 Jul 29, 2024
6159b96
Merge branch 'branch-24.08' into branch-24.08_all-pairs-similarity
nv-rliu Jul 29, 2024
853824c
Merge branch 'branch-24.08' into branch-24.08_all-pairs-similarity
rlratzel Jul 30, 2024
a6971c3
Merge branch 'branch-24.08' into branch-24.08_all-pairs-similarity
jnke2016 Jul 30, 2024
c3fc90f
Merge branch 'branch-24.08' into branch-24.08_all-pairs-similarity
jnke2016 Jul 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions cpp/include/cugraph_c/similarity_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,34 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform cosine similarity computation
*
* Compute the similarity for the specified vertex_pairs
*
* Note that cosine similarity must run on a symmetric graph.
*
* @param [in] handle Handle for accessing resources
* @param [in] graph Pointer to graph
* @param [in] vertex_pairs Vertex pair for input
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
* edge weight of 1
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
* `true`).
* @param [out] result Opaque pointer to similarity results
* @param [out] error Pointer to an error object storing details of any error. Will
* be populated if error code is not CUGRAPH_SUCCESS
* @return error code
*/
cugraph_error_code_t cugraph_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_vertex_pairs_t* vertex_pairs,
bool_t use_weight,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform All-Pairs Jaccard similarity computation
*
Expand Down Expand Up @@ -259,6 +287,44 @@ cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform All Pairs cosine similarity computation
*
* Compute the similarity for all vertex pairs derived from the two-hop neighbors
* of an optional specified vertex list. This function will identify the two-hop
* neighbors of the specified vertices (all vertices in the graph if not specified)
* and compute similarity for those vertices.
*
* If the topk parameter is specified then the result will only contain the top k
* highest scoring results.
*
* Note that cosine similarity must run on a symmetric graph.
*
* @param [in] handle Handle for accessing resources
* @param [in] graph Pointer to graph
* @param [in] vertices Vertex list for input. If null then compute based on
* all vertices in the graph.
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
* edge weight of 1
* @param [in] topk Specify how many answers to return. Specifying SIZE_MAX
* will return all values.
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
* `true`).
* @param [out] result Opaque pointer to similarity results
* @param [out] error Pointer to an error object storing details of any error. Will
* be populated if error code is not CUGRAPH_SUCCESS
* @return error code
*/
cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error);

#ifdef __cplusplus
}
#endif
115 changes: 115 additions & 0 deletions cpp/src/c_api/similarity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,22 @@ struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor {
: std::nullopt,
topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt);

cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
handle_,
v1.data(),
v1.size(),
number_map->data(),
graph_view.vertex_partition_range_lasts(),
false);

cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
handle_,
v2.data(),
v2.size(),
number_map->data(),
graph_view.vertex_partition_range_lasts(),
false);

result_ = new cugraph::c_api::cugraph_similarity_result_t{
new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients,
graph_->weight_type_),
Expand Down Expand Up @@ -274,6 +290,33 @@ struct sorensen_functor {
}
};

struct cosine_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
{
return cugraph::cosine_similarity_coefficients(
handle, graph_view, edge_weight_view, vertex_pairs);
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>>
operator()(raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::optional<raft::device_span<vertex_t const>> vertices,
std::optional<size_t> topk)
{
return cugraph::cosine_similarity_all_pairs_coefficients(
handle, graph_view, edge_weight_view, vertices, topk);
}
};

struct overlap_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
Expand All @@ -300,6 +343,33 @@ struct overlap_functor {
}
};

struct cosine_similarity_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
{
return cugraph::cosine_similarity_coefficients(
handle, graph_view, edge_weight_view, vertex_pairs);
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>>
operator()(raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::optional<raft::device_span<vertex_t const>> vertices,
std::optional<size_t> topk)
{
return cugraph::cosine_similarity_all_pairs_coefficients(
handle, graph_view, edge_weight_view, vertices, topk);
}
};

} // namespace

extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity(
Expand Down Expand Up @@ -391,6 +461,28 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients(
return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_vertex_pairs_t* vertex_pairs,
bool_t use_weight,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error)
{
if (use_weight) {
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
CUGRAPH_INVALID_INPUT,
"use_weight is true but edge weights are not provided.",
*error);
}
similarity_functor functor(
handle, graph, vertex_pairs, cosine_similarity_functor{}, use_weight, do_expensive_check);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
Expand Down Expand Up @@ -459,3 +551,26 @@ extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error)
{
if (use_weight) {
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
CUGRAPH_INVALID_INPUT,
"use_weight is true but edge weights are not provided.",
*error);
}
all_pairs_similarity_functor functor(
handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}
Loading
Loading