-
Notifications
You must be signed in to change notification settings - Fork 0
/
05_train_network.py
193 lines (147 loc) · 7.21 KB
/
05_train_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import os
import torch
from numba import cuda
import argparse
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from helpers.network_training import *
from helpers.utils import np_to_torch, crop_feature
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument("-p", "--parameter_code", help = "Which Wilson coefficient(s) you're scanning over.")
parser.add_argument("-dtype", "--dtype", help = "Data type (Delphes)", default = "delphes_s")
parser.add_argument("-rid", "--run_id", help = "run_id")
parser.add_argument("-f", "--num_features", help = "Number of features to use while training")
parser.add_argument("-n", "--network", help = "Network architecture", default = "dense")
parser.add_argument("-c1",action='store_true',help="Train classifier 1")
parser.add_argument("-c2",action='store_true',help="Train classifier 2")
parser.add_argument("-c3",action='store_true',help="Train classifier 3")
parser.add_argument("-s", "--seed", help = "Random seed", default = 0)
# Read arguments from command line
args = parser.parse_args()
# computing
#os.environ["CUDA_VISIBLE_DEVICES"]= "1"
# set the number of threads that pytorch will use
torch.set_num_threads(2)
# set gpu device
device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
print( "Using device: " + str( device ), flush=True)
# workflow
import yaml
with open("workflow.yaml", "r") as file:
workflow = yaml.safe_load(file)
run_id = args.run_id
run_configs = {}
run_configs["input_precode"] = args.dtype
run_configs["parameter_code"] = args.parameter_code
run_configs["network_id"] = run_id
run_configs["seed"] = args.seed
seed = int(args.seed)
# features to train on
# in order: m_hh, pt_bb, pt_aa, deltaR_aa, deltaR_bb, a0_pt, a1_pt, b0_pt, b1_pt
# These correspond to the features (in order) in the function add_observables of 03_a_read_delphes.py
run_configs["features"] = [15, 16, 17, 10, 9, 0, 2, 5, 7][:int(args.num_features)]
run_configs["bkg.N_train"] = 10000000 # To reduce training time, if necessary
# network architecture
run_configs["network.type"] = args.network
run_configs["network.layers"] = [32, 32]
# training hyperparameters
run_configs["hyperparam.batch_size"] = 1024
run_configs["hyperparam.lr"] = 0.001
run_configs["hyperparam.n_epochs"] = 150
run_configs["hyperparam.patience_ES"] = 20
run_configs["hyperparam.patience_lr"] = 5
with open(f"run_configs/{run_id}.yml", "w") as outfile:
yaml.dump(run_configs, outfile, default_flow_style=False)
n_features = len(run_configs["features"])
print("Loading in samples from with {parameter_code}.".format(parameter_code=run_configs["parameter_code"]))
print("Running analysis on {input_precode}.".format(input_precode=run_configs["input_precode"]))
print("Analysis will use features {features}".format(features=run_configs["features"]))
print()
N_train = int(run_configs["bkg.N_train"])
samples_dir = workflow["sampling"]["output_dir"]
identity_code = run_configs["input_precode"]
features = run_configs["features"]
parameter_code = run_configs["parameter_code"]
# load in the samples
samples_SM = np.load(f'{samples_dir}/plain_real/{identity_code}/x_sm.npy')[:,features]
samples_alt = np.load(f'{samples_dir}/plain_real/{identity_code}/x_alt_{parameter_code}.npy')[:,features]
samples_bkg = np.load(f'{samples_dir}/plain_real/delphes_b0/x_bkg.npy')[:,features]
# load in the theta values
theta_alt = np.load(f'{samples_dir}/plain_real/{identity_code}/theta_alt_{parameter_code}.npy')
theta_alt_sm = np.load(f'{samples_dir}/plain_real/{identity_code}/theta_alt_{parameter_code}.npy')
# shuffle the samples, since they are grouped in chunks of generating theta out of the box
#samples_alt, theta_alt, samples_SM, theta_alt_sm = shuffle(samples_alt, theta_alt, samples_SM, theta_alt_sm, random_state = 42)
# crop to the number of desired signal events
samples_SM = samples_SM[:N_train]
samples_alt = samples_alt[:N_train]
samples_bkg = samples_bkg[:N_train]
theta_alt = theta_alt[:N_train]
theta_alt_sm = theta_alt_sm[:N_train]
print("Preprocessing data...")
print()
all_data = np.vstack((samples_SM, samples_bkg))
scaler = StandardScaler()
scaler.fit(all_data)
# transform
samples_SM = scaler.transform(samples_SM)
samples_alt = scaler.transform(samples_alt)
samples_bkg = scaler.transform(samples_bkg)
with open(f"models/scaler_{run_id}", "wb") as ofile:
pickle.dump(scaler, ofile)
def train_classifier(train_set_0, train_set_1, loc_id):
x_train = np.vstack([train_set_0, train_set_1])
all_labels = np.vstack([np.zeros((train_set_0.shape[0], 1)), np.ones((train_set_1.shape[0], 1))])
X_train, X_val, Y_train, Y_val = train_test_split(x_train, all_labels, test_size=0.2, random_state = seed)
print(f"X_train: {X_train.shape}.\nX_val: {X_val.shape}.\nY_train: {Y_train.shape}.\nY_val: {Y_val.shape}.")
X_train = np_to_torch(X_train, device)
X_val = np_to_torch(X_val, device)
Y_train = np_to_torch(Y_train, device)
Y_val = np_to_torch(Y_val, device)
kl_weight = run_configs["hyperparam.batch_size"]/X_train.shape[0]
if args.network == "bnn":
dense_net = BNN(n_inputs = train_set_0.shape[1], layers = run_configs["network.layers"], prior_sigma = 0.1)
optimizer = torch.optim.AdamW(dense_net.parameters(), lr = run_configs["hyperparam.lr"], weight_decay = 0)
elif args.network == "dense":
dense_net = NeuralNet(n_inputs = train_set_0.shape[1], layers = run_configs["network.layers"])
optimizer = torch.optim.AdamW(dense_net.parameters(), lr = run_configs["hyperparam.lr"], weight_decay = kl_weight)
print("Starting network training...")
epochs, losses, losses_val = train_network(X_train, Y_train, X_val, Y_val, dense_net, optimizer, run_configs["hyperparam.n_epochs"], run_configs["hyperparam.batch_size"], device, seed = seed, train_bnn = False, kl_weight = kl_weight, network_id = f"models/{run_id}_{loc_id}", use_early_stop = True, min_delta = 0, patience_ES = run_configs["hyperparam.patience_ES"], patience_lr = run_configs["hyperparam.patience_lr"], loss_type = "BCE")
print(f"Using {args.network} type networks.")
"""
TRAIN CLASSIFIER 1
- learn LR of alternative S (class 1) to SM S (class 0)
- this must be a parameterized classifier
"""
if args.c1:
print("Training classifier 1...")
denom_c1 = np.c_[samples_SM, theta_alt_sm/10.0]
numer_c1 = np.c_[samples_alt, theta_alt/10.0]
train_classifier(denom_c1, numer_c1, "Ssm_Salt")
print("Done with classifier 1!\n")
"""
TRAIN CLASSIFIER 2
- learn LR of alternative S (class 1) to B (class 0)
- parameterized classifier
"""
if args.c2:
print("Training classifier 2...")
denom_c2 = np.c_[samples_bkg, theta_alt_sm/10.0]
numer_c2 = np.c_[samples_alt, theta_alt/10.0]
train_classifier(denom_c2, numer_c2, "B_Salt")
print("Done with classifier 2!\n")
"""
TRAIN CLASSIFIER 3
- learn LR of B (class 1) to SM S (class 0)
- non-parameterized classifier
- only needs to be run once for a given feature set
"""
if args.c3:
print("Training classifier 3...")
train_classifier(samples_SM, samples_bkg, "Ssm_B")
print("Done with classifier 3!\n")