Skip to content

Commit

Permalink
Renamed query_term_count to query_term_weight
Browse files Browse the repository at this point in the history
  • Loading branch information
hazimehh committed Jun 1, 2015
1 parent bef7d46 commit 9d03b06
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 7 deletions.
4 changes: 2 additions & 2 deletions include/index/score_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ struct score_data

/// doc term id
term_id t_id;
/// query term count
uint64_t query_term_count;
/// query term count (or weight in case of feedback)
double query_term_weight;
/// number of docs that t_id appears in
uint64_t doc_count;
/// number of times t_id appears in corpus
Expand Down
98 changes: 98 additions & 0 deletions include/index/score_data.h~
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/**
* @file score_data.h
* @author Sean Massung
*
* All files in META are dual-licensed under the MIT and NCSA licenses. For more
* details, consult the file LICENSE.mit and LICENSE.ncsa in the root of the
* project.
*/

#ifndef META_SCORE_DATA_H_
#define META_SCORE_DATA_H_

#include "meta.h"

namespace meta
{

namespace corpus
{
class document;
}

namespace index
{
class inverted_index;
}
}

namespace meta
{
namespace index
{

/**
* A score_data object contains information needed to evaluate a ranking
* function. Data is set by the base ranker class as needed, so the derived
* ranking classes don't make many unncessary calls to the inverted index.
*/
struct score_data
{
// general info

/// index queries are running on
inverted_index& idx;
/// average document length
double avg_dl;
/// total number of documents
uint64_t num_docs;
/// total number of terms in the index
uint64_t total_terms;
/// the current query
const corpus::document& query;

// term-based info

/// doc term id
term_id t_id;
/// query term count
uint64_t query_term_count;
/// number of docs that t_id appears in
uint64_t doc_count;
/// number of times t_id appears in corpus
uint64_t corpus_term_count;

// document-based info

/// document id
doc_id d_id;
/// number of times the term appears in the current doc
uint64_t doc_term_count;
/// total number of terms in the doc
uint64_t doc_size;
/// number of unique terms in the doc
uint64_t doc_unique_terms;

/**
* Constructor to initialize most elements.
* @param p_idx The index that is being used
* @param p_avg_dl The average doc length in the index
* @param p_num_docs The number of docs in the index
* @param p_total_terms The total number of terms in the index
* @param p_query The current query
*/
score_data(inverted_index& p_idx, double p_avg_dl, uint64_t p_num_docs,
uint64_t p_total_terms, const corpus::document& p_query)
: idx(p_idx), // gcc no non-const ref init from brace init list
avg_dl{p_avg_dl},
num_docs{p_num_docs},
total_terms{p_total_terms},
query(p_query) // gcc no non-const ref init from brace init list
{
/* nothing */
}
};
}
}

#endif
2 changes: 1 addition & 1 deletion src/index/ranker/lm_ranker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ double language_model_ranker::score_one(const score_data& sd)
double ps = smoothed_prob(sd);
double pc = static_cast<double>(sd.corpus_term_count) / sd.total_terms;

return sd.query_term_count * std::log(ps / (doc_constant(sd) * pc));
return sd.query_term_weight * std::log(ps / (doc_constant(sd) * pc));
}

double language_model_ranker::initial_score(const score_data& sd) const
Expand Down
4 changes: 2 additions & 2 deletions src/index/ranker/okapi_bm25.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ double okapi_bm25::score_one(const score_data& sd)
/ ((k1_ * ((1.0 - b_) + b_ * doc_len / sd.avg_dl))
+ sd.doc_term_count);

double QTF = ((k3_ + 1.0) * sd.query_term_count)
/ (k3_ + sd.query_term_count);
double QTF = ((k3_ + 1.0) * sd.query_term_weight)
/ (k3_ + sd.query_term_weight);

return TF * IDF * QTF;
}
Expand Down
2 changes: 1 addition & 1 deletion src/index/ranker/pivoted_length.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ double pivoted_length::score_one(const score_data& sd)
double norm = (1 - s_) + s_ * (doc_len / sd.avg_dl);
double IDF = log((sd.num_docs + 1) / (0.5 + sd.doc_count));

return TF / norm * sd.query_term_count * IDF;
return TF / norm * sd.query_term_weight * IDF;
}

template <>
Expand Down
2 changes: 1 addition & 1 deletion src/index/ranker/ranker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ranker::score(inverted_index& idx, corpus::document& query,
auto pdata = idx.search_primary(t_id);
sd.doc_count = pdata->counts().size();
sd.t_id = t_id;
sd.query_term_count = tpair.second;
sd.query_term_weight = tpair.second;
sd.corpus_term_count = idx.total_num_occurences(sd.t_id);
for (auto& dpair : pdata->counts())
{
Expand Down

0 comments on commit 9d03b06

Please sign in to comment.