-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_experiments.py
86 lines (67 loc) · 2.45 KB
/
run_experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Runs a set of experiments defined in a json file in the experiments folder
# specify the json file to run by passing its name as a command line argument
import json
import time
from sys import argv
from pathlib import Path
from sklearn.model_selection import StratifiedShuffleSplit
from modules import load_data
from modules import experiment_objects
from modules.evaluate import write_results
DATA_DIR = Path.cwd() / "data"
# Get config values
print("Loading config values")
config_file = Path.cwd() / "configs/config.json"
with open(config_file, "r") as f:
config = json.load(f)
# Load ratings data and trim down to selected values
print("Loading ratings data")
ratings = load_data.trim_ratings(
load_data.ratings_data(), config["min_user_ratings"], config["min_book_ratings"]
)
ratings["recommend"] = load_data.set_threshold(ratings, config["threshold"])
ratings = load_data.set_class_proportions(
ratings, config["min_proportion_positive"], config["max_proportion_positive"]
)
print(f"Data ready: {ratings.shape[0]} ratings to use")
# Train/test split - just get the indexes for now
sss = StratifiedShuffleSplit(
n_splits=1,
test_size=(1 - config["training_proportion"]),
random_state=config["random_state"],
)
train, test = list(sss.split(ratings, ratings["recommend"]))[0]
print("Test/train split done")
print(f"{len(train)} ratings in training set")
print(f"{len(test)} ratings in test set")
# Create object to hold data
data = experiment_objects.ExperimentData(
ratings, train, test, config["training_run"], config["random_state"], DATA_DIR
)
# Load the experiment configuration from json
print("Preparing experiments")
exp_file = argv[1]
if exp_file[-5:] != ".json":
exp_file = exp_file + ".json"
experiments_file = Path.cwd() / "settings" / exp_file
with open(experiments_file, "r") as f:
experiments = json.load(f)
# Run the experiments
for e in experiments:
print(f"Now running: {e['description']}")
start = time.time()
running = experiment_objects.Experiment(data=data, **e)
running.process_folds()
if config["training_run"]:
running.hybridise_folds()
running.evaluate_folds()
else:
running.hybridise_test()
running.retrain()
running.evaluate_test()
elapsed = time.time() - start
write_results(config, running, elapsed)
print(f"Completed {e['description']}")
print(f"Time taken: {round(elapsed, 2)} seconds")
print()
print("All experiments completed")