forked from dvysardana/RecommenderSystems_PyData_2016
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Evaluation.py
126 lines (93 loc) · 5.08 KB
/
Evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#Class to calculate precision and recall
import random
class precision_recall_calculator():
def __init__(self, test_data, train_data, pm, is_model):
self.test_data = test_data
self.train_data = train_data
self.user_test_sample = None
self.model1 = pm
self.model2 = is_model
self.ism_training_dict = dict()
self.pm_training_dict = dict()
self.test_dict = dict()
#Method to return random percentage of values from a list
def remove_percentage(self, list_a, percentage):
k = int(len(list_a) * percentage)
random.seed(0)
indicies = random.sample(range(len(list_a)), k)
new_list = [list_a[i] for i in indicies]
return new_list
#Create a test sample of users for use in calculating precision
#and recall
def create_user_test_sample(self, percentage):
#Find users common between training and test set
users_test_and_training = list(set(self.test_data['user_id'].unique()).intersection(set(self.train_data['user_id'].unique())))
print("Length of user_test_and_training:%d" % len(users_test_and_training))
#Take only random user_sample of users for evaluations
self.users_test_sample = self.remove_percentage(users_test_and_training, percentage)
print("Length of user sample:%d" % len(self.users_test_sample))
#Method to generate recommendations for users in the user test sample
def get_test_sample_recommendations(self):
#For these test_sample users, get top 10 recommendations from training set
#self.ism_training_dict = {}
#self.pm_training_dict = {}
#self.test_dict = {}
for user_id in self.users_test_sample:
#Get items for user_id from item similarity model
print("Getting recommendations for user:%s" % user_id)
user_sim_items = self.model2.recommend(user_id)
self.ism_training_dict[user_id] = list(user_sim_items["song"])
#Get items for user_id from popularity model
user_sim_items = self.model1.recommend(user_id)
self.pm_training_dict[user_id] = list(user_sim_items["song"])
#Get items for user_id from test_data
test_data_user = self.test_data[self.test_data['user_id'] == user_id]
self.test_dict[user_id] = set(test_data_user['song'].unique() )
#Method to calculate the precision and recall measures
def calculate_precision_recall(self):
#Create cutoff list for precision and recall calculation
cutoff_list = list(range(1,11))
#For each distinct cutoff:
# 1. For each distinct user, calculate precision and recall.
# 2. Calculate average precision and recall.
ism_avg_precision_list = []
ism_avg_recall_list = []
pm_avg_precision_list = []
pm_avg_recall_list = []
num_users_sample = len(self.users_test_sample)
for N in cutoff_list:
ism_sum_precision = 0
ism_sum_recall = 0
pm_sum_precision = 0
pm_sum_recall = 0
ism_avg_precision = 0
ism_avg_recall = 0
pm_avg_precision = 0
pm_avg_recall = 0
for user_id in self.users_test_sample:
ism_hitset = self.test_dict[user_id].intersection(set(self.ism_training_dict[user_id][0:N]))
pm_hitset = self.test_dict[user_id].intersection(set(self.pm_training_dict[user_id][0:N]))
testset = self.test_dict[user_id]
pm_sum_precision += float(len(pm_hitset))/float(N)
pm_sum_recall += float(len(pm_hitset))/float(len(testset))
ism_sum_precision += float(len(ism_hitset))/float(len(testset))
ism_sum_recall += float(len(ism_hitset))/float(N)
pm_avg_precision = pm_sum_precision/float(num_users_sample)
pm_avg_recall = pm_sum_recall/float(num_users_sample)
ism_avg_precision = ism_sum_precision/float(num_users_sample)
ism_avg_recall = ism_sum_recall/float(num_users_sample)
ism_avg_precision_list.append(ism_avg_precision)
ism_avg_recall_list.append(ism_avg_recall)
pm_avg_precision_list.append(pm_avg_precision)
pm_avg_recall_list.append(pm_avg_recall)
return (pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list, ism_avg_recall_list)
#A wrapper method to calculate all the evaluation measures
def calculate_measures(self, percentage):
#Create a test sample of users
self.create_user_test_sample(percentage)
#Generate recommendations for the test sample users
self.get_test_sample_recommendations()
#Calculate precision and recall at different cutoff values
#for popularity mode (pm) as well as item similarity model (ism)
return self.calculate_precision_recall()
#return (pm_avg_precision_list, pm_avg_recall_list, ism_avg_precision_list, ism_avg_recall_list)