Skip to content

Commit

Permalink
added more hyperparams sets for lstms
Browse files Browse the repository at this point in the history
  • Loading branch information
felimomo committed Mar 5, 2024
1 parent abda692 commit ad1b25c
Showing 1 changed file with 86 additions and 18 deletions.
104 changes: 86 additions & 18 deletions hyperpars/rppo-asm2o.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,92 @@ id: "1"
repo: "cboettig/rl-ecology"
save_path: "/home/rstudio/rl4fisheries/saved_agents"

# algo hyperpars taken from BipedalWalker-v3 hyperpars:
# algo hyperpars taken from:
# https://github.com/DLR-RM/rl-baselines3-zoo/blob/master/hyperparams/ppo_lstm.yml

# # BIPEDAL WALKER

# algo_config:
# # normalize: True # not clear what this one actually does -- from the source code it seems to 'activate' VecNormalize, but more care & examination needed
# policy: 'MlpLstmPolicy'
# tensorboard_log: "/home/rstudio/logs"
# n_steps: 256
# batch_size: 256
# gae_lambda: 0.95
# gamma: 0.999
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: !!float 3e-4
# clip_range: 0.18
# policy_kwargs: "dict(
# ortho_init=False,
# activation_fn=torch.nn.ReLU,
# lstm_hidden_size=64,
# enable_critic_lstm=True,
# net_arch=dict(pi=[64], vf=[64])
# )"

# # HALF CHEETAH V4

algo_config:
# normalize: True # not clear what this one actually does -- from the source code it seems to 'activate' VecNormalize, but more care & examination needed
policy: 'MlpLstmPolicy'
tensorboard_log: "/home/rstudio/logs"
n_steps: 256
batch_size: 256
gae_lambda: 0.95
gamma: 0.999
n_epochs: 10
ent_coef: 0.0
learning_rate: !!float 3e-4
clip_range: 0.18
policy_kwargs: "dict(
normalize: true
n_envs: 1
policy: 'MlpLstmPolicy'
tensorboard_log: "/home/rstudio/logs"
batch_size: 64
n_steps: 512
gamma: 0.98
learning_rate: 2.0633e-05
ent_coef: 0.000401762
clip_range: 0.1
n_epochs: 20
gae_lambda: 0.92
max_grad_norm: 0.8
vf_coef: 0.58096
policy_kwargs: "dict(
log_std_init=-2,
ortho_init=False,
activation_fn=torch.nn.ReLU,
lstm_hidden_size=64,
enable_critic_lstm=True,
net_arch=dict(pi=[64], vf=[64])
)"
activation_fn=nn.ReLU,
net_arch=dict(pi=[256, 256], vf=[256, 256])
)"



# # INVERTED PENDULUM

# algo_config:
# n_envs: 8
# tensorboard_log: "/home/rstudio/logs"
# policy: 'MlpLstmPolicy'
# n_steps: 2048
# batch_size: 64
# gae_lambda: 0.95
# gamma: 0.99
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: 2.5e-4
# clip_range: 0.2


# # MOUNTAIN CAR NO VEL

# algo_config:
# n_envs: 8
# tensorboard_log: "/home/rstudio/logs"
# policy: 'MlpLstmPolicy'
# batch_size: 256
# n_steps: 1024
# gamma: 0.9999
# learning_rate: !!float 7.77e-05
# ent_coef: 0.00429
# clip_range: 0.1
# n_epochs: 10
# gae_lambda: 0.9
# max_grad_norm: 5
# vf_coef: 0.19
# use_sde: True
# sde_sample_freq: 8
# policy_kwargs: "dict(log_std_init=0.0, ortho_init=False,
# lstm_hidden_size=32,
# enable_critic_lstm=True,
# net_arch=dict(pi=[64], vf=[64]))"

0 comments on commit ad1b25c

Please sign in to comment.