-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_5_95_on_recommenders.py
136 lines (108 loc) · 5.9 KB
/
run_5_95_on_recommenders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import pandas as pd
import Recommenders.NonPersonalizedRecommender
from Recommenders.Recommender_import_list import *
from Data_manager.Movielens.Movielens1MReader import Movielens1MReader
from Data_manager.split_functions.split_train_validation_multiple_splits import split_multiple_times
from Data_manager.HMDatasetReader import HMDatasetReader
from Recommenders.Incremental_Training_Early_Stopping import Incremental_Training_Early_Stopping
from Recommenders.BaseCBFRecommender import BaseItemCBFRecommender, BaseUserCBFRecommender
from Evaluation.Evaluator import EvaluatorHoldout, EvaluatorMultipleURMs
import traceback, os
dataset_name = "hm-exponential-age-clustered"
"""
Name of the folder inside processed where the dataset was saved with Dataset.save_data()
"""
def _get_instance(recommender_class, URM_train, ICM_all, UCM_all):
if issubclass(recommender_class, BaseItemCBFRecommender):
recommender_object = recommender_class(URM_train, ICM_all)
elif issubclass(recommender_class, BaseUserCBFRecommender):
recommender_object = recommender_class(URM_train, UCM_all)
elif recommender_class == Recommenders.NonPersonalizedRecommender.ExplicitTopPopAgeClustered:
list_of_URMs = []
print("Reading mapper for age_groups...")
customer_mapper_df = pd.read_csv("customers_age_group.csv")
customer_mapper_df.set_index("UserID", inplace=True)
print(customer_mapper_df)
for URM_name, URM in dataset_object.AVAILABLE_URM.items():
if URM_name != "URM_validation":
list_of_URMs.append(URM)
recommender_object = recommender_class(list_of_URMs, dataset_object, customer_mapper_df)
else:
print(recommender_class)
recommender_object = recommender_class(URM_train)
return recommender_object
if __name__ == '__main__':
reader = HMDatasetReader(False)
# PROCESSED_PATH = os.getenv('PROCESSED_PATH')
dataset_object = reader.load_data('{}/{}/'.format("processed", dataset_name))
print("Loaded dataset into memory...")
# print(dataset_object.AVAILABLE_URM)
# Here all URMs and ICMs must be loaded, if no URM_all is present an error will occur in Dataset library
# URM_train = dataset_object.get_URM_from_name('URM_train')
URM_test = dataset_object.get_URM_from_name('URM_validation')
for ICM_name, ICM_object in dataset_object.get_loaded_ICM_dict().items():
print(ICM_name)
ICM_all = []
UCM_all = []
URM_train = []
print(URM_train)
print(URM_test.shape)
dataset_object.print_statistics_global()
recommender_class_list = [
ExplicitTopPopAgeClustered
]
evaluator = EvaluatorHoldout(URM_test, [5, 12], exclude_seen=True)
# Take random splits of 5% of total validation
list_5_splits, list_95_splits = split_multiple_times(URM_test, 5, 0.95, keep_only_test=False)
# Evaluate on all splits
evaluator5 = EvaluatorMultipleURMs(list_5_splits, [5, 12])
evaluator95 = EvaluatorMultipleURMs(list_95_splits, [5, 12])
# from MatrixFactorization.PyTorch.MF_MSE_PyTorch import MF_MSE_PyTorch
earlystopping_keywargs = {"validation_every_n": 5,
"stop_on_validation": True,
"evaluator_object": EvaluatorHoldout(URM_test, [20], exclude_seen=True),
"lower_validations_allowed": 5,
"validation_metric": "MAP",
}
output_root_path = "./result_experiments/"
# If directory does not exist, create
if not os.path.exists(output_root_path):
os.makedirs(output_root_path)
logFile = open(output_root_path + "result_all_algorithms.txt", "a")
for recommender_class in recommender_class_list:
try:
print("Algorithm: {}".format(recommender_class))
recommender_object = _get_instance(recommender_class, URM_train, ICM_all, UCM_all)
if isinstance(recommender_object, Incremental_Training_Early_Stopping):
fit_params = {"epochs": 15, **earlystopping_keywargs}
else:
fit_params = {}
recommender_object.fit(**fit_params)
results_run_1, results_run_string_1 = evaluator.evaluateRecommender(recommender_object)
print("Algorithm: {}, results: \n{}".format(recommender_class, results_run_string_1))
results_5 = evaluator5.evaluate_with_statistics(recommender_object)
results_95 = evaluator95.evaluate_with_statistics(recommender_object)
# recommender_object.save_model(output_root_path, file_name="temp_model.zip")
#
# recommender_object = _get_instance(recommender_class, URM_train, ICM_all, UCM_all)
# recommender_object.load_model(output_root_path, file_name="temp_model.zip")
#
# os.remove(output_root_path + "temp_model.zip")
#
# results_run_2, results_run_string_2 = evaluator.evaluateRecommender(recommender_object)
#
# if recommender_class not in [Random]:
# assert results_run_1.equals(results_run_2)
# print("Algorithm: {}, results on 5 splits: {}".format(recommender_class, results_5[12]["MAP"]))
print("Result recap on 5% splits: \n")
evaluator5.print_map_statistics()
# print("Algorithm: {}, results on 95 splits: {}".format(recommender_class, results_95))
print("Result recap on 95% splits: \n")
evaluator95.print_map_statistics()
logFile.write("Algorithm: {}, results: \n{}\n".format(recommender_class, results_run_string_1))
logFile.write("Algorithm: {}, MAP@12 results on 5% splits: {}".format(recommender_class, evaluator5))
logFile.flush()
except Exception as e:
traceback.print_exc()
logFile.write("Algorithm: {} - Exception: {}\n".format(recommender_class, str(e)))
logFile.flush()