Skip to content

Commit

Permalink
Fix HITS convergence error. (#4043)
Browse files Browse the repository at this point in the history
We set epsilon in hits call to 1e-6 or 1e-8 in our tests. HITS internally uses max. norm to normalize HITS values after each iteration before computing HITS value changes in two consecutive iterations. Sum of HITS values tends to grow with the number of vertices. Using a fixed epsilon leads to convergence failure in large graphs.

This PR updates HITS to compare sum of HITS value changes in two consecutive iterations with `epsilon` * graph_view.number_of_vertices() following networkx documentation (https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.hits_alg.hits.html).

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Joseph Nke (https://github.com/jnke2016)
  - Naim (https://github.com/naimnv)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: #4043
  • Loading branch information
seunghwak authored Dec 6, 2023
1 parent a5718c6 commit 65df1a2
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 28 deletions.
3 changes: 2 additions & 1 deletion cpp/src/link_analysis/hits_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ std::tuple<result_t, size_t> hits(raft::handle_t const& handle,
if (num_vertices == 0) { return std::make_tuple(diff_sum, final_iteration_count); }

CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative.");
auto tolerance = static_cast<result_t>(graph_view.number_of_vertices()) * epsilon;

// Check validity of initial guess if supplied
if (has_initial_hubs_guess && do_expensive_check) {
Expand Down Expand Up @@ -171,7 +172,7 @@ std::tuple<result_t, size_t> hits(raft::handle_t const& handle,
std::swap(prev_hubs, curr_hubs);
iter++;

if (diff_sum < epsilon) {
if (diff_sum < tolerance) {
break;
} else if (iter >= max_iterations) {
CUGRAPH_FAIL("HITS failed to converge.");
Expand Down
6 changes: 3 additions & 3 deletions cpp/tests/c_api/hits_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ int test_hits()
weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0};
weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136};

double epsilon = 0.0001;
double epsilon = 0.00002;
size_t max_iterations = 20;

// hits wants store_transposed = TRUE
Expand Down Expand Up @@ -195,7 +195,7 @@ int test_hits_with_transpose()
weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0};
weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136};

double epsilon = 0.0001;
double epsilon = 0.00002;
size_t max_iterations = 20;

// Hits wants store_transposed = TRUE
Expand Down Expand Up @@ -232,7 +232,7 @@ int test_hits_with_initial()
vertex_t h_initial_vertices[] = {0, 1, 2, 3, 4};
weight_t h_initial_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608};

double epsilon = 0.0001;
double epsilon = 0.00002;
size_t max_iterations = 20;

return generic_hits_test(h_src,
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/c_api/mg_hits_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int test_hits(const cugraph_resource_handle_t* handle)
weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0};
weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136};

double epsilon = 0.0001;
double epsilon = 0.00002;
size_t max_iterations = 20;

// hits wants store_transposed = TRUE
Expand Down Expand Up @@ -203,7 +203,7 @@ int test_hits_with_transpose(const cugraph_resource_handle_t* handle)
weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0};
weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136};

double epsilon = 0.0001;
double epsilon = 0.00002;
size_t max_iterations = 20;

// Hits wants store_transposed = TRUE
Expand Down
28 changes: 16 additions & 12 deletions cpp/tests/link_analysis/hits_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,11 @@ std::tuple<std::vector<result_t>, std::vector<result_t>, double, size_t> hits_re
size_t max_iterations,
std::optional<result_t const*> starting_hub_values,
bool normalized,
double tolerance)
double epsilon)
{
CUGRAPH_EXPECTS(num_vertices > 1, "number of vertices expected to be non-zero");
auto tolerance = static_cast<result_t>(num_vertices) * epsilon;

std::vector<result_t> prev_hubs(num_vertices, result_t{1.0} / num_vertices);
std::vector<result_t> prev_authorities(num_vertices, result_t{1.0} / num_vertices);
std::vector<result_t> curr_hubs(num_vertices);
Expand Down Expand Up @@ -127,8 +129,8 @@ std::tuple<std::vector<result_t>, std::vector<result_t>, double, size_t> hits_re
}

struct Hits_Usecase {
bool check_correctness{true};
bool check_initial_input{false};
bool check_correctness{true};
};

template <typename input_usecase_t>
Expand Down Expand Up @@ -175,8 +177,8 @@ class Tests_Hits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, inpu
// 3. run hits

auto graph_view = graph.view();
auto maximum_iterations = 500;
weight_t tolerance = 1e-5;
auto maximum_iterations = 200;
weight_t epsilon = 1e-7;
rmm::device_uvector<weight_t> d_hubs(graph_view.local_vertex_partition_range_size(),
handle.get_stream());

Expand All @@ -201,7 +203,7 @@ class Tests_Hits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, inpu
graph_view,
d_hubs.data(),
d_authorities.data(),
tolerance,
epsilon,
maximum_iterations,
hits_usecase.check_initial_input,
true,
Expand Down Expand Up @@ -232,7 +234,7 @@ class Tests_Hits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, inpu
(hits_usecase.check_initial_input) ? std::make_optional(initial_random_hubs.data())
: std::nullopt,
true,
tolerance);
epsilon);

std::vector<weight_t> h_cugraph_hits{};
if (renumber) {
Expand All @@ -246,8 +248,7 @@ class Tests_Hits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, inpu
handle.sync_stream();
auto threshold_ratio = 1e-3;
auto threshold_magnitude =
(1.0 / static_cast<weight_t>(graph_view.number_of_vertices())) *
threshold_ratio; // skip comparison for low hits vertices (lowly ranked vertices)
1e-6; // skip comparison for low hits vertices (lowly ranked vertices)
auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
return std::abs(lhs - rhs) <=
std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
Expand Down Expand Up @@ -294,14 +295,17 @@ INSTANTIATE_TEST_SUITE_P(
Tests_Hits_File,
::testing::Combine(
// enable correctness checks
::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}),
::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"),
cugraph::test::File_Usecase("test/datasets/dolphins.mtx"))));

INSTANTIATE_TEST_SUITE_P(rmat_small_test,
Tests_Hits_Rmat,
// enable correctness checks
::testing::Combine(::testing::Values(Hits_Usecase{true, false},
::testing::Combine(::testing::Values(Hits_Usecase{false, true},
Hits_Usecase{true, true}),
::testing::Values(cugraph::test::Rmat_Usecase(
10, 16, 0.57, 0.19, 0.19, 0, false, false))));
Expand All @@ -315,7 +319,7 @@ INSTANTIATE_TEST_SUITE_P(
Tests_Hits_File,
::testing::Combine(
// disable correctness checks
::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}),
::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -327,7 +331,7 @@ INSTANTIATE_TEST_SUITE_P(
Tests_Hits_Rmat,
// disable correctness checks for large graphs
::testing::Combine(
::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}),
::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));

CUGRAPH_TEST_PROGRAM_MAIN()
18 changes: 8 additions & 10 deletions cpp/tests/link_analysis/mg_hits_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
#include <gtest/gtest.h>

struct Hits_Usecase {
bool check_correctness{true};
bool check_initial_input{false};
bool check_correctness{true};
};

template <typename input_usecase_t>
Expand Down Expand Up @@ -81,7 +81,7 @@ class Tests_MGHits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, in
auto mg_graph_view = mg_graph.view();

auto maximum_iterations = 200;
weight_t tolerance = 1e-8;
weight_t epsilon = 1e-7;
rmm::device_uvector<weight_t> d_mg_hubs(mg_graph_view.local_vertex_partition_range_size(),
handle_->get_stream());

Expand Down Expand Up @@ -110,7 +110,7 @@ class Tests_MGHits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, in
mg_graph_view,
d_mg_hubs.data(),
d_mg_authorities.data(),
tolerance,
epsilon,
maximum_iterations,
hits_usecase.check_initial_input,
true,
Expand Down Expand Up @@ -205,7 +205,7 @@ class Tests_MGHits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, in
sg_graph_view,
d_sg_hubs.data(),
d_sg_authorities.data(),
tolerance,
epsilon,
maximum_iterations,
hits_usecase.check_initial_input,
true,
Expand All @@ -218,9 +218,7 @@ class Tests_MGHits : public ::testing::TestWithParam<std::tuple<Hits_Usecase, in

auto threshold_ratio = 1e-3;
auto threshold_magnitude =
(1.0 / static_cast<result_t>(mg_graph_view.number_of_vertices())) *
threshold_ratio; // skip comparison for low Hits verties (lowly ranked
// vertices)
1e-6; // skip comparison for low Hits verties (lowly ranked vertices)
auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
return std::abs(lhs - rhs) <
std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
Expand Down Expand Up @@ -274,7 +272,7 @@ INSTANTIATE_TEST_SUITE_P(
Tests_MGHits_File,
::testing::Combine(
// enable correctness checks
::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}),
::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/web-Google.mtx"),
cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"),
Expand All @@ -285,7 +283,7 @@ INSTANTIATE_TEST_SUITE_P(
Tests_MGHits_Rmat,
::testing::Combine(
// enable correctness checks
::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}),
::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -297,7 +295,7 @@ INSTANTIATE_TEST_SUITE_P(
Tests_MGHits_Rmat,
::testing::Combine(
// disable correctness checks for large graphs
::testing::Values(Hits_Usecase{false, false}),
::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}),
::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false))));

CUGRAPH_MG_TEST_PROGRAM_MAIN()

0 comments on commit 65df1a2

Please sign in to comment.