Skip to content

Commit

Permalink
update configs
Browse files Browse the repository at this point in the history
  • Loading branch information
wenzhangliu committed Aug 16, 2023
1 parent 9673fa0 commit 83e3715
Show file tree
Hide file tree
Showing 33 changed files with 130 additions and 127 deletions.
29 changes: 15 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -322,20 +322,21 @@ $ tensorboard --logdir ./logs/dqn/torch/CartPole-v0

### Atari Environment (Ongoing)

| Task | DQN | C51 | PPO |
|-------------------|----------|---------|---------|
| ALE/AirRaid-v5 | 7316.67 | 5450.00 | 9283.33 |
| ALE/Alien-v5 | 2676.67 | 2413.33 | 2313.33 |
| ALE/Amidar-v5 | 627.00 | 293.0 | 964.67 |
| ALE/Assault-v5 | 9981.67 | | 6265.67 |
| ALE/Asterix-v5 | 30516.67 | | 2900.00 |
| ALE/Asteroids-v5 | | | 3430.00 |
| ALE/Bowling-v5 | 92.00 | 56.67 | 76.00 |
| ALE/Breakout-v5 | 415.33 | 431.0 | 371.67 |
| ALE/Freeway-v5 | 34.00 | 33.0 | 34.0 |
| ALE/MsPacman-v5 | 4650.00 | 4690.00 | 4120.00 |
| ALE/Pong-v5 | 21.0 | 20.0 | 21.0 |
| ALE/Qbert-v5 | 16350.0 | 12875.0 | 20050.0 |
| Task | DQN | C51 | PPO |
|-------------------|----------|----------|---------|
| ALE/AirRaid-v5 | 7316.67 | 5450.00 | 9283.33 |
| ALE/Alien-v5 | 2676.67 | 2413.33 | 2313.33 |
| ALE/Amidar-v5 | 627.00 | 293.0 | 964.67 |
| ALE/Assault-v5 | 9981.67 | 9088.67 | 6265.67 |
| ALE/Asterix-v5 | 30516.67 | 12866.67 | 2900.00 |
| ALE/Asteroids-v5 | 1393.33 | 2180.0 | 3430.00 |
| ALE/Atlantis-v5 | | | |
| ALE/Bowling-v5 | 92.00 | 56.67 | 76.00 |
| ALE/Breakout-v5 | 415.33 | 431.0 | 371.67 |
| ALE/Freeway-v5 | 34.00 | 33.0 | 34.0 |
| ALE/MsPacman-v5 | 4650.00 | 4690.00 | 4120.00 |
| ALE/Pong-v5 | 21.0 | 20.0 | 21.0 |
| ALE/Qbert-v5 | 16350.0 | 12875.0 | 20050.0 |



Expand Down
7 changes: 0 additions & 7 deletions train_c51.sh

This file was deleted.

7 changes: 0 additions & 7 deletions train_ppo.sh

This file was deleted.

14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/25m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/2m_vs_1z.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/3m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/5m_vs_6m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/8m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/8m_vs_9m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/MMM2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
14 changes: 7 additions & 7 deletions xuanpolicy/configs/dcg/sc2/corridor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,26 @@ recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [32, ]
q_hidden_size: [128, ] # the units for each hidden layer
hidden_utility_dim: 256 # hidden units of the utility function
hidden_payoff_dim: 256 # hidden units of the payoff function
representation_hidden_size: [64, ]
q_hidden_size: [64, ] # the units for each hidden layer
hidden_utility_dim: 64 # hidden units of the utility function
hidden_payoff_dim: 64 # hidden units of the payoff function
bias_net: "Basic_MLP"
hidden_bias_dim: [256, ] # hidden units of the bias network with global states as input
hidden_bias_dim: [64, ] # hidden units of the bias network with global states as input
activation: "ReLU"

low_rank_payoff: False # low-rank approximation of payoff function
payoff_rank: 5 # the rank K in the paper
graph_type: "FULL" # specific type of the coordination graph
n_msg_iterations: 1 # number of iterations for message passing during belief propagation
n_msg_iterations: 8 # number of iterations for message passing during belief propagation
msg_normalized: True # Message normalization during greedy action selection (Kok and Vlassis, 2006)

seed: 1
parallels: 1
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.95 # discount factor
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
Expand Down
2 changes: 1 addition & 1 deletion xuanpolicy/configs/mappo/sc2/25m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ activation: "ReLU"
seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_epoch: 10
n_minibatch: 1
learning_rate: 0.0007 # 7e-4
weight_decay: 0
Expand Down
6 changes: 3 additions & 3 deletions xuanpolicy/configs/mappo/sc2/5m_vs_6m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ activation: "ReLU"
seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_epoch: 10
n_minibatch: 1
learning_rate: 0.0007 # 7e-4
weight_decay: 0

vf_coef: 1.0
ent_coef: 0.01
target_kl: 0.25
clip_range: 0.2
clip_range: 0.05
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.99 # discount factor

Expand All @@ -46,7 +46,7 @@ use_global_state: False # if use global state to replace joint observations
use_grad_norm: True # gradient normalization
max_grad_norm: 10.0
use_value_clip: True # limit the value range
value_clip_range: 0.2
value_clip_range: 0.05
use_value_norm: True # use running mean and std to normalize rewards.
use_huber_loss: True # True: use huber loss; False: use MSE loss.
huber_delta: 10.0
Expand Down
4 changes: 2 additions & 2 deletions xuanpolicy/configs/mappo/sc2/8m_vs_9m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ weight_decay: 0
vf_coef: 1.0
ent_coef: 0.01
target_kl: 0.25
clip_range: 0.2
clip_range: 0.05
clip_type: 1 # Gradient clip for Mindspore: 0: ms.ops.clip_by_value; 1: ms.nn.ClipByNorm()
gamma: 0.99 # discount factor

Expand All @@ -46,7 +46,7 @@ use_global_state: False # if use global state to replace joint observations
use_grad_norm: True # gradient normalization
max_grad_norm: 10.0
use_value_clip: True # limit the value range
value_clip_range: 0.2
value_clip_range: 0.05
use_value_norm: True # use running mean and std to normalize rewards.
use_huber_loss: True # True: use huber loss; False: use MSE loss.
huber_delta: 10.0
Expand Down
6 changes: 3 additions & 3 deletions xuanpolicy/configs/mappo/sc2/MMM2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
initialize: "orthogonal"
gain: 0.01
gain: 1.0

actor_hidden_size: []
critic_hidden_size: []
Expand All @@ -27,8 +27,8 @@ activation: "ReLU"
seed: 1
parallels: 1
n_size: 128
n_epoch: 15
n_minibatch: 1
n_epoch: 5
n_minibatch: 2
learning_rate: 0.0007 # 7e-4
weight_decay: 0

Expand Down
Loading

0 comments on commit 83e3715

Please sign in to comment.