-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
272 lines (238 loc) · 10.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import datetime
import os
import torch
import logging
import random
import string
from yacs.config import CfgNode as CN
import gssc # noqa, register custom modules
from gssc.optimizer.extra_optimizers import ExtendedSchedulerConfig
from torch_geometric.graphgym.config import cfg, dump_cfg, set_agg_dir, set_cfg, load_cfg, makedirs_rm_exist
from torch_geometric.graphgym.loader import create_loader
from torch_geometric.graphgym.logger import set_printing
from torch_geometric.graphgym.optim import create_optimizer, create_scheduler, OptimizerConfig
from torch_geometric.graphgym.model_builder import create_model
from torch_geometric.graphgym.train import train
from torch_geometric.graphgym.utils.agg_runs import agg_runs
from torch_geometric.graphgym.utils.comp_budget import params_count
from torch_geometric.graphgym.utils.device import auto_select_device
from torch_geometric.graphgym.register import train_dict
from torch_geometric import seed_everything
from gssc.finetuning import load_pretrained_model_cfg, init_model_from_pretrained
from gssc.logger import create_logger
import warnings
import argparse
def parse_args() -> argparse.Namespace:
r"""Parses the command line arguments."""
parser = argparse.ArgumentParser(description="GraphGym")
parser.add_argument("--cfg", dest="cfg_file", type=str, required=True, help="The configuration file path.")
parser.add_argument("--repeat", type=int, default=1, help="The number of repeated jobs.")
parser.add_argument("--mark_done", action="store_true", help="Mark yaml as done after a job has finished.")
parser.add_argument(
"opts", default=None, nargs=argparse.REMAINDER, help="See graphgym/config.py for remaining options."
)
parser.add_argument("--log_code", default=1, type=int)
parser.add_argument("--device", required=True, type=str)
parser.add_argument("--seed", default=None, type=int)
parser.add_argument("--name_tag", default=None, type=str)
parser.add_argument("--max_epoch", default=None, type=int)
parser.add_argument("--batch_size", default=None, type=int)
parser.add_argument("--dim_hidden", default=None, type=int)
parser.add_argument("--times_func", default=None, type=str)
parser.add_argument("--dim_pe", default=None, type=int)
parser.add_argument("--loss_fun", default=None, type=str)
parser.add_argument("--dname", default=None, type=str)
parser.add_argument("--act", default=None, type=str)
parser.add_argument("--jk", default=None, type=int)
parser.add_argument("--weight_decay", default=None, type=float)
parser.add_argument("--base_lr", default=None, type=float)
parser.add_argument("--min_lr", default=None, type=float)
parser.add_argument("--reduce_factor", default=None, type=float)
parser.add_argument("--schedule_patience", default=None, type=int)
parser.add_argument("--max_freqs", default=None, type=int)
parser.add_argument("--num_warmup_epochs", default=None, type=int)
parser.add_argument("--node_encoder_name", default=None, type=str)
parser.add_argument("--layer_type", default=None, type=str)
parser.add_argument("--layers", default=None, type=int)
parser.add_argument("--init_pe_dim", default=None, type=int)
parser.add_argument("--dropout_res", default=None, type=float)
parser.add_argument("--dropout_ff", default=None, type=float)
parser.add_argument("--dropout_local", default=None, type=float)
parser.add_argument("--more_mapping", default=None, type=int)
parser.add_argument("--reweigh_self", default=None, type=int)
return parser.parse_args()
def def_extra_param():
cfg.extra = CN()
cfg.extra.init_pe_dim = 32
cfg.extra.dropout_res = 0.0
cfg.extra.dropout_ff = 0.0
cfg.extra.dropout_local = 0.0
cfg.extra.check_if_pe_done = 0
cfg.extra.more_mapping = 1
cfg.extra.reweigh_self = 1
cfg.extra.jk = 0
def new_optimizer_config(cfg):
return OptimizerConfig(
optimizer=cfg.optim.optimizer,
base_lr=cfg.optim.base_lr,
weight_decay=cfg.optim.weight_decay,
momentum=cfg.optim.momentum,
)
def new_scheduler_config(cfg):
return ExtendedSchedulerConfig(
scheduler=cfg.optim.scheduler,
steps=cfg.optim.steps,
lr_decay=cfg.optim.lr_decay,
max_epoch=cfg.optim.max_epoch,
reduce_factor=cfg.optim.reduce_factor,
schedule_patience=cfg.optim.schedule_patience,
min_lr=cfg.optim.min_lr,
num_warmup_epochs=cfg.optim.num_warmup_epochs,
train_mode=cfg.train.mode,
eval_period=cfg.train.eval_period,
)
def custom_set_out_dir(cfg, cfg_fname, name_tag):
"""Set custom main output directory path to cfg.
Include the config filename and name_tag in the new :obj:`cfg.out_dir`.
Args:
cfg (CfgNode): Configuration node
cfg_fname (string): Filename for the yaml format configuration file
name_tag (string): Additional name tag to identify this execution of the
configuration file, specified in :obj:`cfg.name_tag`
"""
run_name = os.path.splitext(os.path.basename(cfg_fname))[0]
run_name += f"-{name_tag}" if name_tag else ""
cfg.out_dir = os.path.join(cfg.out_dir, run_name)
def custom_set_run_dir(cfg, run_id):
"""Custom output directory naming for each experiment run.
Args:
cfg (CfgNode): Configuration node
run_id (int): Main for-loop iter id (the random seed or dataset split)
"""
cfg.run_dir = os.path.join(cfg.out_dir, str(run_id))
# Make output directory
if cfg.train.auto_resume:
os.makedirs(cfg.run_dir, exist_ok=True)
else:
makedirs_rm_exist(cfg.run_dir)
def run_loop_settings():
"""Create main loop execution settings based on the current cfg.
Configures the main execution loop to run in one of two modes:
1. 'multi-seed' - Reproduces default behaviour of GraphGym when
args.repeats controls how many times the experiment run is repeated.
Each iteration is executed with a random seed set to an increment from
the previous one, starting at initial cfg.seed.
2. 'multi-split' - Executes the experiment run over multiple dataset splits,
these can be multiple CV splits or multiple standard splits. The random
seed is reset to the initial cfg.seed value for each run iteration.
Returns:
List of run IDs for each loop iteration
List of rng seeds to loop over
List of dataset split indices to loop over
"""
if len(cfg.run_multiple_splits) == 0:
# 'multi-seed' run mode
num_iterations = args.repeat
seeds = [cfg.seed + x for x in range(num_iterations)]
split_indices = [cfg.dataset.split_index] * num_iterations
run_ids = seeds
else:
# 'multi-split' run mode
if args.repeat != 1:
raise NotImplementedError("Running multiple repeats of multiple " "splits in one run is not supported.")
num_iterations = len(cfg.run_multiple_splits)
seeds = [cfg.seed] * num_iterations
split_indices = cfg.run_multiple_splits
run_ids = split_indices
return run_ids, seeds, split_indices
def update_cfg_using_args(cfg, args):
def update_dict(d, args):
for key, value in d.items():
if isinstance(value, dict):
update_dict(d[key], args)
else:
if key in args and getattr(args, key) is not None and key not in ["layers", "dname"]:
d[key] = getattr(args, key)
update_dict(cfg, args)
if cfg["name_tag"] == "random":
cfg["name_tag"] = "".join(random.choices(string.ascii_uppercase + string.digits, k=5))
cfg["log_code"] = args.log_code
if args.layers is not None:
cfg.gt.layers = args.layers
if args.dname is not None:
cfg.dataset.name = args.dname
cfg.gnn.dim_inner = cfg.gt.dim_hidden
if __name__ == "__main__":
warnings.filterwarnings("ignore")
# Load cmd line args
args = parse_args()
device = args.device
def_extra_param()
# Load config file
set_cfg(cfg)
load_cfg(cfg, args)
update_cfg_using_args(cfg, args)
custom_set_out_dir(cfg, args.cfg_file, cfg.name_tag)
dump_cfg(cfg)
# Set Pytorch environment
torch.set_num_threads(cfg.num_threads)
# Repeat for multiple experiment runs
for run_id, seed, split_index in zip(*run_loop_settings()):
# Set configurations for each run
custom_set_run_dir(cfg, run_id)
set_printing()
cfg.dataset.split_index = split_index
cfg.seed = seed
cfg.run_id = run_id
seed_everything(cfg.seed)
cfg.device = f"cuda:{device}"
if cfg.pretrained.dir:
cfg = load_pretrained_model_cfg(cfg)
logging.info(f"[*] Run ID {run_id}: seed={cfg.seed}, " f"split_index={cfg.dataset.split_index}")
logging.info(f" Starting now: {datetime.datetime.now()}")
# Set machine learning pipeline
loaders = create_loader()
loggers = create_logger()
# custom_train expects three loggers for 'train', 'valid' and 'test'.
# GraphGym code creates one logger/loader for each of the 'train_mask' etc.
# attributes in the dataset. As a work around it, we create one logger for each
# of the types.
# loaders are a const, so it is ok to just duplicate the loader.
if cfg.dataset.name == "ogbn-arxiv" or cfg.dataset.name == "ogbn-proteins":
loggers_2 = create_logger()
loggers_3 = create_logger()
loggers_2[0].name = "val"
loggers_3[0].name = "test"
loggers.extend(loggers_2)
loggers.extend(loggers_3)
loaders = loaders * 3
model = create_model()
if cfg.pretrained.dir:
model = init_model_from_pretrained(
model, cfg.pretrained.dir, cfg.pretrained.freeze_main, cfg.pretrained.reset_prediction_head
)
optimizer = create_optimizer(model.parameters(), new_optimizer_config(cfg))
scheduler = create_scheduler(optimizer, new_scheduler_config(cfg))
# Print model info
logging.info(model)
logging.info(cfg)
cfg.params = params_count(model)
logging.info("Num parameters: %s", cfg.params)
# Start training
if cfg.train.mode == "standard":
if cfg.wandb.use:
logging.warning(
"[W] WandB logging is not supported with the " "default train.mode, set it to `custom`"
)
train(loggers, loaders, model, optimizer, scheduler)
else:
train_dict[cfg.train.mode](loggers, loaders, model, optimizer, scheduler)
# Aggregate results from different seeds
try:
agg_runs(cfg.out_dir, cfg.metric_best)
except Exception as e:
logging.info(f"Failed when trying to aggregate multiple runs: {e}")
# When being launched in batch mode, mark a yaml as done
if args.mark_done:
os.rename(args.cfg_file, f"{args.cfg_file}_done")
logging.info(f"[*] All done: {datetime.datetime.now()}")