Skip to content

Commit

Permalink
Expose new all-pairs Similarity algorithms (#4502)
Browse files Browse the repository at this point in the history
A variation of the Similarity algorithms leveraging of all-pairs between vertices is available in the C and C++ API. It also enable the retrieval of the top k vertices.

This PR:
- Exposes the new all-pairs Similarity algorithm to the PLC and python API
- Add SG and MG python tests
- Add docstrings with examples

closes #4470

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Ralph Liu (https://github.com/nv-rliu)
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #4502
  • Loading branch information
jnke2016 authored Jul 31, 2024
1 parent a0b112b commit 8f7fec9
Show file tree
Hide file tree
Showing 31 changed files with 4,631 additions and 150 deletions.
66 changes: 66 additions & 0 deletions cpp/include/cugraph_c/similarity_algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,34 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform cosine similarity computation
*
* Compute the similarity for the specified vertex_pairs
*
* Note that cosine similarity must run on a symmetric graph.
*
* @param [in] handle Handle for accessing resources
* @param [in] graph Pointer to graph
* @param [in] vertex_pairs Vertex pair for input
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
* edge weight of 1
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
* `true`).
* @param [out] result Opaque pointer to similarity results
* @param [out] error Pointer to an error object storing details of any error. Will
* be populated if error code is not CUGRAPH_SUCCESS
* @return error code
*/
cugraph_error_code_t cugraph_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_vertex_pairs_t* vertex_pairs,
bool_t use_weight,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform All-Pairs Jaccard similarity computation
*
Expand Down Expand Up @@ -259,6 +287,44 @@ cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(
cugraph_similarity_result_t** result,
cugraph_error_t** error);

/**
* @brief Perform All Pairs cosine similarity computation
*
* Compute the similarity for all vertex pairs derived from the two-hop neighbors
* of an optional specified vertex list. This function will identify the two-hop
* neighbors of the specified vertices (all vertices in the graph if not specified)
* and compute similarity for those vertices.
*
* If the topk parameter is specified then the result will only contain the top k
* highest scoring results.
*
* Note that cosine similarity must run on a symmetric graph.
*
* @param [in] handle Handle for accessing resources
* @param [in] graph Pointer to graph
* @param [in] vertices Vertex list for input. If null then compute based on
* all vertices in the graph.
* @param [in] use_weight If true consider the edge weight in the graph, if false use an
* edge weight of 1
* @param [in] topk Specify how many answers to return. Specifying SIZE_MAX
* will return all values.
* @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to
* `true`).
* @param [out] result Opaque pointer to similarity results
* @param [out] error Pointer to an error object storing details of any error. Will
* be populated if error code is not CUGRAPH_SUCCESS
* @return error code
*/
cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error);

#ifdef __cplusplus
}
#endif
115 changes: 115 additions & 0 deletions cpp/src/c_api/similarity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,22 @@ struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor {
: std::nullopt,
topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt);

cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
handle_,
v1.data(),
v1.size(),
number_map->data(),
graph_view.vertex_partition_range_lasts(),
false);

cugraph::unrenumber_int_vertices<vertex_t, multi_gpu>(
handle_,
v2.data(),
v2.size(),
number_map->data(),
graph_view.vertex_partition_range_lasts(),
false);

result_ = new cugraph::c_api::cugraph_similarity_result_t{
new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients,
graph_->weight_type_),
Expand Down Expand Up @@ -274,6 +290,33 @@ struct sorensen_functor {
}
};

struct cosine_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
{
return cugraph::cosine_similarity_coefficients(
handle, graph_view, edge_weight_view, vertex_pairs);
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>>
operator()(raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::optional<raft::device_span<vertex_t const>> vertices,
std::optional<size_t> topk)
{
return cugraph::cosine_similarity_all_pairs_coefficients(
handle, graph_view, edge_weight_view, vertices, topk);
}
};

struct overlap_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
Expand All @@ -300,6 +343,33 @@ struct overlap_functor {
}
};

struct cosine_similarity_functor {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> operator()(
raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs)
{
return cugraph::cosine_similarity_coefficients(
handle, graph_view, edge_weight_view, vertex_pairs);
}

template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>>
operator()(raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
std::optional<cugraph::edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
std::optional<raft::device_span<vertex_t const>> vertices,
std::optional<size_t> topk)
{
return cugraph::cosine_similarity_all_pairs_coefficients(
handle, graph_view, edge_weight_view, vertices, topk);
}
};

} // namespace

extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity(
Expand Down Expand Up @@ -391,6 +461,28 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients(
return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_vertex_pairs_t* vertex_pairs,
bool_t use_weight,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error)
{
if (use_weight) {
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
CUGRAPH_INVALID_INPUT,
"use_weight is true but edge weights are not provided.",
*error);
}
similarity_functor functor(
handle, graph, vertex_pairs, cosine_similarity_functor{}, use_weight, do_expensive_check);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
Expand Down Expand Up @@ -459,3 +551,26 @@ extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients(

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}

extern "C" cugraph_error_code_t cugraph_all_pairs_cosine_similarity_coefficients(
const cugraph_resource_handle_t* handle,
cugraph_graph_t* graph,
const cugraph_type_erased_device_array_view_t* vertices,
bool_t use_weight,
size_t topk,
bool_t do_expensive_check,
cugraph_similarity_result_t** result,
cugraph_error_t** error)
{
if (use_weight) {
CAPI_EXPECTS(
reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->edge_weights_ != nullptr,
CUGRAPH_INVALID_INPUT,
"use_weight is true but edge weights are not provided.",
*error);
}
all_pairs_similarity_functor functor(
handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
}
Loading

0 comments on commit 8f7fec9

Please sign in to comment.