diff --git a/configs/experiment/routing/am-critic.yaml b/configs/experiment/routing/am-critic.yaml new file mode 100644 index 00000000..efbf3de6 --- /dev/null +++ b/configs/experiment/routing/am-critic.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +# Use the following to take the default values from am.yaml +# Replace below only the values that you want to change compared to the default values +defaults: + - routing/am.yaml + +logger: + wandb: + tags: ["am-critic", "${env.name}"] + name: am-critic-${env.name}${env.num_loc} + +model: + baseline: "critic" \ No newline at end of file diff --git a/configs/experiment/routing/am-ppo.yaml b/configs/experiment/routing/am-ppo.yaml new file mode 100644 index 00000000..766e41b8 --- /dev/null +++ b/configs/experiment/routing/am-ppo.yaml @@ -0,0 +1,50 @@ +# @package _global_ + +defaults: + - override /model: am-ppo.yaml + - override /env: tsp.yaml + - override /callbacks: default.yaml + - override /trainer: default.yaml + - override /logger: wandb.yaml + +env: + num_loc: 50 + +logger: + wandb: + project: "rl4co" + tags: ["am-ppo", "${env.name}"] + group: ${env.name}${env.num_loc} + name: am-${env.name}${env.num_loc} + +model: + batch_size: 512 + val_batch_size: 1024 + test_batch_size: 1024 + train_data_size: 1_280_000 + val_data_size: 10_000 + test_data_size: 10_000 + clip_range: 0.2 + ppo_epochs: 2 + mini_batch_size: 512 + vf_lambda: 0.5 + entropy_lambda: 0.01 + normalize_adv: False + max_grad_norm: 0.5 + optimizer_kwargs: + lr: 1e-4 + weight_decay: 0 + lr_scheduler: + "MultiStepLR" + lr_scheduler_kwargs: + milestones: [80, 95] + gamma: 0.1 + +trainer: + max_epochs: 100 + gradient_clip_val: Null # not supported in manual optimization + +seed: 1234 + +metrics: + train: ["loss", "reward", "surrogate_loss", "value_loss", "entropy_bonus"] diff --git a/configs/experiment/tsp/am.yaml b/configs/experiment/routing/am-xl.yaml similarity index 74% rename from configs/experiment/tsp/am.yaml rename to configs/experiment/routing/am-xl.yaml index a8c1b01a..3410f53c 100644 --- a/configs/experiment/tsp/am.yaml +++ b/configs/experiment/routing/am-xl.yaml @@ -15,11 +15,13 @@ logger: project: "rl4co" tags: ["am", "${env.name}"] group: ${env.name}${env.num_loc}" - name: "am-${env.name}${env.num_loc}" - + name: "am-xl-${env.name}${env.num_loc}" model: - batch_size: 512 + policy_kwargs: + num_encoder_layers: 6 + normalization: 'instance' + batch_size: 2048 val_batch_size: 1024 test_batch_size: 1024 train_data_size: 1_280_000 @@ -27,14 +29,15 @@ model: test_data_size: 10_000 optimizer_kwargs: lr: 1e-4 + weight_decay: 0 lr_scheduler: "MultiStepLR" lr_scheduler_kwargs: - milestones: [80, 95] + milestones: [480, 495] gamma: 0.1 trainer: - max_epochs: 100 + max_epochs: 500 seed: 1234 diff --git a/configs/experiment/routing/am.yaml b/configs/experiment/routing/am.yaml index 84488ebd..bd58316d 100644 --- a/configs/experiment/routing/am.yaml +++ b/configs/experiment/routing/am.yaml @@ -14,8 +14,8 @@ logger: wandb: project: "rl4co" tags: ["am", "${env.name}"] - group: ${env.name}${env.num_loc}" - name: "am-${env.name}${env.num_loc}" + group: ${env.name}${env.num_loc} + name: am-${env.name}${env.num_loc} model: batch_size: 512 diff --git a/configs/experiment/routing/pomo.yaml b/configs/experiment/routing/pomo.yaml new file mode 100644 index 00000000..bfbd20e8 --- /dev/null +++ b/configs/experiment/routing/pomo.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +defaults: + - override /model: pomo.yaml + - override /env: tsp.yaml + - override /callbacks: default.yaml + - override /trainer: default.yaml + - override /logger: wandb.yaml + +env: + num_loc: 50 + +logger: + wandb: + project: "rl4co" + tags: ["pomo", "${env.name}"] + group: ${env.name}${env.num_loc}" + name: "pomo-${env.name}${env.num_loc}" + +model: + batch_size: 64 + train_data_size: 160_000 + val_data_size: 10_000 + test_data_size: 10_000 + optimizer_kwargs: + lr: 1e-4 + weight_decay: 1e-6 + lr_scheduler: + "MultiStepLR" + lr_scheduler_kwargs: + milestones: [95] + gamma: 0.1 + +trainer: + max_epochs: 100 + +seed: 1234 + diff --git a/configs/experiment/routing/symnco.yaml b/configs/experiment/routing/symnco.yaml new file mode 100644 index 00000000..f58cb9c3 --- /dev/null +++ b/configs/experiment/routing/symnco.yaml @@ -0,0 +1,42 @@ +# @package _global_ + +defaults: + - override /model: symnco.yaml + - override /env: tsp.yaml + - override /callbacks: default.yaml + - override /trainer: default.yaml + - override /logger: wandb.yaml + +env: + num_loc: 50 + +logger: + wandb: + project: "rl4co" + tags: ["symnco", "${env.name}"] + group: ${env.name}${env.num_loc}" + name: "symnco-${env.name}${env.num_loc}" + +model: + batch_size: 512 + val_batch_size: 1024 + test_batch_size: 1024 + train_data_size: 1_280_000 + val_data_size: 10_000 + test_data_size: 10_000 + num_starts: 0 # 0 for no augmentation for multi-starts + num_augment: 10 + optimizer_kwargs: + lr: 1e-4 + weight_decay: 0 + lr_scheduler: + "MultiStepLR" + lr_scheduler_kwargs: + milestones: [80, 95] + gamma: 0.1 + +trainer: + max_epochs: 100 + +seed: 1234 + diff --git a/configs/model/pomo.yaml b/configs/model/pomo.yaml index 5bbe77d9..8dae711d 100644 --- a/configs/model/pomo.yaml +++ b/configs/model/pomo.yaml @@ -1 +1,10 @@ -_target_: rl4co.models.POMO \ No newline at end of file +_target_: rl4co.models.POMO + +# During val/test only we apply dihedral transformations +num_augment: 8 + +metrics: + train: ["loss", "reward"] + val: ["reward", "max_reward", "max_aug_reward"] + test: ${metrics.val} + log_on_step: True diff --git a/configs/model/symnco.yaml b/configs/model/symnco.yaml index e0924175..ba0d92c3 100644 --- a/configs/model/symnco.yaml +++ b/configs/model/symnco.yaml @@ -1,4 +1,10 @@ _target_: rl4co.models.SymNCO -num_augment: 8 -num_starts: ${env.num_loc} \ No newline at end of file +num_augment: 10 +num_starts: 0 # by default we use only symmetric augmentations + +metrics: + train: ["loss", "loss_ss", "loss_ps", "loss_inv", "reward"] + val: ["reward", "max_reward", "max_aug_reward"] + test: ${metrics.val} + log_on_step: True