Skip to content

Commit

Permalink
improve ir_eval unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
smassung committed Apr 6, 2015
1 parent eee7dfe commit a05841f
Showing 1 changed file with 127 additions and 23 deletions.
150 changes: 127 additions & 23 deletions src/test/ir_eval_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,137 @@ namespace meta
namespace testing
{

int ir_eval_tests()
void check_query(index::ir_eval& eval,
const std::vector<std::pair<doc_id, double>>& ranking,
query_id qid, double e_f1, double e_p, double e_r,
double e_avg_p, double e_ndcg,
uint64_t num_docs = std::numeric_limits<uint64_t>::max())
{
system("rm -rf ceeaus-inv");
create_config("file");
auto idx = index::make_index<index::inverted_index, caching::splay_cache>(
"test-config.toml", uint32_t{10000});
index::okapi_bm25 ranker;
index::ir_eval eval{"test-config.toml"};
auto f1 = eval.f1(ranking, qid, num_docs);
auto p = eval.precision(ranking, qid, num_docs);
auto r = eval.recall(ranking, qid, num_docs);
auto avg_p = eval.avg_p(ranking, qid, num_docs);
auto ndcg = eval.ndcg(ranking, qid, num_docs);
ASSERT_APPROX_EQUAL(f1, e_f1);
ASSERT_APPROX_EQUAL(p, e_p);
ASSERT_APPROX_EQUAL(r, e_r);
ASSERT_APPROX_EQUAL(avg_p, e_avg_p);
ASSERT_APPROX_EQUAL(ndcg, e_ndcg);
}

int num_failed = 0;
int ir_eval_bounds()
{
return testing::run_test(
"ir-eval-bounds", [&]()
{
system("rm -rf ceeaus-inv");
create_config("file");
auto idx = index::make_index<index::inverted_index,
caching::splay_cache>(
"test-config.toml", uint32_t{10000});
index::okapi_bm25 ranker;
index::ir_eval eval{"test-config.toml"};
// sanity test bounds
for (size_t i = 0; i < 5; ++i)
{
corpus::document query{idx->doc_path(doc_id{i}), doc_id{0}};
auto ranking = ranker.score(*idx, query);
auto f1 = eval.f1(ranking, query_id{i});
auto p = eval.precision(ranking, query_id{i});
auto r = eval.recall(ranking, query_id{i});
auto avg_p = eval.avg_p(ranking, query_id{i});
auto ndcg = eval.ndcg(ranking, query_id{i});
ASSERT(f1 >= 0 && f1 <= 1);
ASSERT(p >= 0 && p <= 1);
ASSERT(r >= 0 && r <= 1);
ASSERT(avg_p >= 0 && avg_p <= 1);
ASSERT(ndcg >= 0 && ndcg <= 1);
}

num_failed += testing::run_test("ir-eval", [&]()
{
for (size_t i = 0; i < 5; ++i)
auto map = eval.map();
auto gmap = eval.gmap();
ASSERT(map >= 0 && map <= 1);
ASSERT(gmap >= 0 && gmap <= 1);
system("rm -rf ceeaus-inv test-config.toml");
});
}

int ir_eval_results()
{
return testing::run_test(
"ir-eval-results", [&]()
{
corpus::document query{idx->doc_path(doc_id{i}), doc_id{0}};
auto ranking = ranker.score(*idx, query);
double f1 = eval.f1(ranking, query_id{i});
double p = eval.precision(ranking, query_id{i});
double r = eval.recall(ranking, query_id{i});
ASSERT(f1 >= 0 && f1 <= 1);
ASSERT(p >= 0 && p <= 1);
ASSERT(r >= 0 && r <= 1);
}
});

system("rm -rf ceeaus-inv test-config.toml");
create_config("file");
index::ir_eval eval{"test-config.toml"};
ASSERT_APPROX_EQUAL(eval.map(), 0.0);
ASSERT_APPROX_EQUAL(eval.gmap(), 0.0);

// make some fake results based on the loaded qrels file
std::vector<std::pair<doc_id, double>> results;
query_id qid{0};
auto idcg_5 = 1.0 + 1.0 / std::log2(3.0) + 1.0 / std::log2(4.0)
+ 1.0 / std::log2(5.0) + 1.0 / std::log2(6.0);
auto idcg = idcg_5 + 1.0 / std::log2(7.0) + 1.0 / std::log2(8.0)
+ 1.0 / std::log2(9.0) + 1.0 / std::log2(10.0)
+ 1.0 / std::log2(11.0);

results.emplace_back(doc_id{0}, 1.0); // relevant
check_query(eval, results, qid, 0.2 / 1.1, 1, 0.1, 0.1, 1.0 / idcg);
check_query(eval, results, qid, 0.2 / 1.1, 1, 0.1, 0.2,
1.0 / idcg_5, 5);

results.emplace_back(doc_id{2}, 0.9); // not relevant
check_query(eval, results, qid, 0.1 / 0.6, 0.5, 0.1, 0.1,
1.0 / idcg);
check_query(eval, results, qid, 0.1 / 0.6, 0.5, 0.1, 0.2,
1.0 / idcg_5, 5);

results.emplace_back(doc_id{1}, 0.8); // relevant
check_query(eval, results, qid,
(2.0 * (2.0 / 3.0) * 0.2) / (2.0 / 3.0 + 0.2),
2.0 / 3.0, 0.2, 1.0 / 6.0, 1.5 / idcg);
check_query(eval, results, qid,
(2.0 * (2.0 / 3.0) * 0.2) / (2.0 / 3.0 + 0.2),
2.0 / 3.0, 0.2, 1.0 / 3.0, 1.5 / idcg_5, 5);

results.emplace_back(doc_id{30}, 0.8); // relevant
results.emplace_back(doc_id{6}, 0.7); // relevant
results.emplace_back(doc_id{43}, 0.6); // relevant
results.emplace_back(doc_id{24}, 0.5); // relevant
results.emplace_back(doc_id{34}, 0.4); // relevant
results.emplace_back(doc_id{35}, 0.3); // relevant
results.emplace_back(doc_id{38}, 0.2); // relevant
results.emplace_back(doc_id{754}, 0.1); // relevant
auto avg_p_5 = (1.0 + 2.0 / 3.0 + 3.0 / 4.0 + 4.0 / 5.0 + 5.0 / 6.0)
/ 5.0;
auto avg_p = (1.0 + 2.0 / 3.0 + 3.0 / 4.0 + 4.0 / 5.0 + 5.0 / 6.0
+ 6.0 / 7.0 + 7.0 / 8.0 + 8.0 / 9.0 + 9.0 / 10.0
+ 10.0 / 11.0) / 10.0;
auto dcg_5 = 1.0 + 1.0 / std::log2(4.0) + 1.0 / std::log2(5.0)
+ 1.0 / std::log2(6.0); // 4 terms, 1 zero term
auto dcg = dcg_5 + 1.0 / std::log2(7.0) + 1.0 / std::log2(8.0)
+ 1.0 / std::log2(9.0) + 1.0 / std::log2(10.0)
+ 1.0 / std::log2(11.0) + 1.0 / std::log2(12.0);
check_query(eval, results, qid,
(2.0 * (10.0 / 11.0)) / ((10.0 / 11.0) + 1.0),
10.0 / 11.0, 1.0, avg_p, dcg / idcg);
check_query(eval, results, qid,
(2.0 * (4.0 / 5.0) * 0.4) / ((4.0 / 5.0) + 0.4),
4.0 / 5.0, 0.4, avg_p_5, dcg_5 / idcg_5, 5);

results.erase(results.begin() + 1); // remove non-relevant result
check_query(eval, results, qid, 1.0, 1.0, 1.0, 1.0, 1.0);
// recall is still not perfect @5
check_query(eval, results, qid, 1.0 / 1.5, 1.0, 0.5, 1.0, 1.0, 5);
system("rm test-config.toml");
});
}

int ir_eval_tests()
{
int num_failed = 0;
num_failed += ir_eval_bounds();
num_failed += ir_eval_results();
return num_failed;
}
}
Expand Down

0 comments on commit a05841f

Please sign in to comment.