forked from o19s/search-metrics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
qual.py
161 lines (129 loc) · 5.78 KB
/
qual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def rr(grades, n=0):
"""
Reciprocal Rank is the first step in calculating Mean Reciprocal Rank
https://en.wikipedia.org/wiki/Mean_reciprocal_rank
This is a measure for how far down a results list a user has to go to find the first relevant document. A document
is relevant if its grade is > 0.
:param grades: A list of numbers indicating how relevant the corresponding document was at that position in the list
:param n: A number indicating how far down the list to go before giving up
:return: A number between 1.0 and 0.0 indicating how far up the list the relevant document was found
"""
if n == 0:
n = len(grades)
n = min(n, len(grades))
for i in range(0, n):
if grades[i] > 0.0:
return float(1.0 / (i + 1.0))
return 0.0
def gain(grade, maxGrade=4.0):
return (2**grade - 1.0) / (2 ** maxGrade)
#def gain(grade, maxGrade=4.0):
# return (grade / maxGrade)
def err(grades, n=0):
""" Algorithm from: http://olivier.chapelle.cc/pub/err.pdf
Gets at the probability a user will find something useful before
getting annoyed
Useful stuff has a probability near 1 of satisfying the user.
In the cascading model As the user scans down. If something has a
near 1 probability of satisfying user near the top. If something near top has
:param grades: A list of numbers indicating how relevant the corresponding document was at that position in the list
:param n: A number indicating the maximum number of positions to consider
:param max_grade: A float indicating the maximum grade a doc can get
:return: A number between 1.0 and 0.0 indicating how good the results were (higher is better)
"""
if n == 0:
n = len(grades)
n = min(n, len(grades))
ERR = 0
trustBank = 1 # In a way the users "trustBank"
for i in range(0,n):
r = i + 1
pThisUseful = gain(grades[i], maxGrade=4.0)
discThisUseful = pThisUseful / r # How much of this users trustBank
# you're about to exhaust
# So good stuff at r=1, less trustBank
# Bad stuff, at r=1, trustBank reduced a lot
ERR += trustBank * discThisUseful # Sum the users remaining trustBank at pos'n r
trustBank *= (1 - pThisUseful) # Reduce users trustBank
# print("ERR@%s trust? %s trustBank? %s" % (r, ERR, trustBank))
return ERR
def damage(results1, results2, at=10):
""" How "damaging" could the change from results1 -> results2 be?
(results1, results2 are an array of document identifiers)
For each result in result1,
Is the result in result2[:at]
If so, how far has it moved?
If not,
Consider it a move of at+1
damage += discount(idx) * moveDist
"""
from math import log
def discount(idx):
return 1.0 / log(idx + 2)
idx = 0
dmg = 0.0
if len(results1) < at:
at = len(results1)
for result in results1[:at]:
movedToIdx = at + 1 # out of the window
if result in results2:
movedToIdx = results2.index(result)
moveDist = abs(movedToIdx - idx)
dmg += discount(idx) * moveDist
idx += 1
return dmg
def dcg(grades, n=0):
"""
Discounted Cumulative Gain
https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Discounted_Cumulative_Gain
A metric that varies directly with the average judgement of the result set, placing more weight toward the top.
:param grades: A list of numbers indicating how relevant the corresponding document was at that position in the list
:param n: A number indicating the maximum number of positions to consider
:return: A number >= 0.0 indicating how the result set should be judged
"""
if n == 0:
n = len(grades)
n = min(n, len(grades))
from math import log
dcg = 0
for i in range(0,n):
r = i + 1
dcg += grades[i] / log((r + 1), 2.0)
return dcg
def dcgWConfs(grades, confs, midGrade=2.0, n=0):
if len(grades) != len(confs):
raise ValueError("dcgWConfs needs same number of grades as confs")
if n == 0:
n = len(grades)
n = min(n, len(grades))
from math import log
dcg = 0
for i in range(0,n):
# Conf adjusted grade, if low confidence we dont have good information
# on the 'true' grade, so it gets pushed to a 2. The true DCG is a bell curve...
# This is similar to the RankSVM unbiased technique in Joachim's paper
r = (i + 1)
confAdjustedGrade = midGrade + confs[i] * (grades[i] - midGrade)
print("Grade %s Conf Adjusted %s" % (grades[i], confAdjustedGrade))
dcg += (confAdjustedGrade / log((r + 1), 2.0))
return dcg
def ndcg(grades, rels, n=0):
"""
Normalized Discounted Cumulative Gain
https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG
A metric that considers the sort order of the rated documents against an ideal sort order with higher rated docs
at the top and lower rated docs at the bottom.
:param grades: A list of numbers indicating how relevant the corresponding document was at that position in the list
:param rels: A list of numbers containing all of the relevance judgements, for computing the ideal DCG. May be just the non-zero values.
:param n: A number indicating the maximum number of positions to consider
:return: A number between 1.0 and 0.0 indicating how close to the ideal ordering the docs are (higher is better)
"""
if n == 0:
n = len(grades)
n = min(n, len(grades))
_dcg = dcg(grades, n=n)
_idcg = dcg(sorted(rels, reverse=True), n=n)
if _idcg > 0.0:
return _dcg / _idcg
else:
return 0.0