Skip to content

Commit

Permalink
gfootball
Browse files Browse the repository at this point in the history
  • Loading branch information
wenzhangliu committed Nov 10, 2023
1 parent 3d239ca commit 575072e
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 22 deletions.
1 change: 0 additions & 1 deletion xuance/configs/iql/football/3v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ policy: "Basic_Q_network_marl"
representation: "Basic_RNN"
vectorize: "Subproc_Football"
runner: "Football_Runner"
on_policy: False

# recurrent settings for Basic_RNN representation
use_recurrent: True
Expand Down
8 changes: 4 additions & 4 deletions xuance/configs/mappo/football/3v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ runner: "Football_Runner"
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [128, 128, 128]
recurrent_hidden_size: 128
fc_hidden_sizes: [64, 64, 64]
recurrent_hidden_size: 64
N_recurrent_layers: 1
dropout: 0
normalize: "LayerNorm"
Expand All @@ -34,7 +34,7 @@ activation: "ReLU"

seed: 1
parallels: 50
n_size: 50
n_size: 450
n_epoch: 15
n_minibatch: 2
learning_rate: 0.0007 # 7e-4
Expand Down Expand Up @@ -65,7 +65,7 @@ start_training: 1
running_steps: 25000000
training_frequency: 1

eval_interval: 250000
eval_interval: 200000
test_episode: 50
log_dir: "./logs/mappo/"
model_dir: "./models/mappo/"
Expand Down
59 changes: 59 additions & 0 deletions xuance/configs/qmix/football/3v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
agent: "QMIX" # the learning algorithms_marl
global_state: True
# environment settings
env_name: "Football"
scenario: "academy_3_vs_1_with_keeper"
use_stacked_frames: False # Whether to use stacked_frames
num_agent: 3
num_adversary: 0
obs_type: "simple115v2" # representation used to build the observation, choices: ["simple115v2", "extracted", "pixels_gray", "pixels"]
rewards_type: "scoring,checkpoints" # comma separated list of rewards to be added
smm_width: 96 # width of super minimap
smm_height: 72 # height of super minimap
fps: 15
policy: "Mixing_Q_network"
representation: "Basic_RNN"
vectorize: "Subproc_Football"
runner: "Football_Runner"

# recurrent settings for Basic_RNN representation
use_recurrent: True
rnn: "GRU"
recurrent_layer_N: 1
fc_hidden_sizes: [128, ]
recurrent_hidden_size: 128
N_recurrent_layers: 1
dropout: 0

representation_hidden_size: [128, ]
q_hidden_size: [128, ] # the units for each hidden layer
activation: "ReLU"

hidden_dim_mixing_net: 128 # hidden units of mixing network
hidden_dim_hyper_net: 128 # hidden units of hyper network

seed: 1
parallels: 50
buffer_size: 5000
batch_size: 32
learning_rate: 0.0007
gamma: 0.99 # discount factor
double_q: True # use double q learning

start_greedy: 1.0
end_greedy: 0.05
decay_step_greedy: 50000
start_training: 1000 # start training after n episodes
running_steps: 25000000 # 25M
train_per_step: False # True: train model per step; False: train model per episode.
training_frequency: 1
sync_frequency: 200

use_grad_clip: False
grad_clip_norm: 0.5

eval_interval: 250000
test_episode: 50
log_dir: "./logs/qmix/"
model_dir: "./models/qmix/"
videos_dir: "./videos/qmix/"
5 changes: 4 additions & 1 deletion xuance/environment/football/raw_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(self, args):
write_full_episode_dumps = False
self.render = False
write_video = False
self.n_agents = args.num_agent

self.env = football_env.create_environment(
env_name=self.env_id,
Expand Down Expand Up @@ -71,7 +72,9 @@ def reset(self):
def step(self, action):
obs, reward, terminated, info = self.env.step(action)
truncated = False
return obs, reward, terminated, truncated, info
global_reward = np.sum(reward)
reward_n = np.array([global_reward] * self.n_agents)
return obs, reward_n, terminated, truncated, info

def get_frame(self):
original_obs = self.env._env._observation
Expand Down
22 changes: 6 additions & 16 deletions xuance/torch/runners/runner_football.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ def get_battles_result(self, last_battles_info):
return win_rate

def run_episodes(self, test_mode=False):
step_info, train_info = {}, {}
episode_score, best_score = [], -np.inf
episode_score, episode_step, best_score = [], [], -np.inf

# reset the envs
obs_n, state, infos = self.envs.reset()

envs_done = self.envs.buf_done
self.env_step = 0
filled = np.zeros([self.n_envs, self.episode_length, 1], np.int32)
Expand Down Expand Up @@ -66,17 +65,8 @@ def run_episodes(self, test_mode=False):
self.current_step += 1
if terminated[i_env] or truncated[i_env]: # one env is terminal
episode_score.append(info[i_env]["episode_score"])
episode_step.append(info[i_env]["episode_step"])
available_actions = self.envs.get_avail_actions()
# log
if self.use_wandb:
step_info["Episode-Steps/env-%d" % i_env] = info[i_env]["episode_step"]
step_info["Train-Episode-Rewards/env-%d" % i_env] = info[i_env]["episode_score"]
else:
step_info["Train-Results/Episode-Steps"] = {"env-%d" % i_env: info[i_env]["episode_step"]}
step_info["Train-Results/Episode-Rewards"] = {
"env-%d" % i_env: info[i_env]["episode_score"]}
self.log_infos(step_info, self.current_step)

terminal_data = (next_obs_n, next_state, available_actions, filled)
if self.on_policy:
if terminated[i_env]:
Expand Down Expand Up @@ -109,12 +99,12 @@ def run_episodes(self, test_mode=False):
self.env_step += 1
obs_n, state = deepcopy(next_obs_n), deepcopy(next_state)

if test_mode:
pass
else:
if not test_mode:
self.agents.memory.store_episodes() # store episode data
n_epoch = self.agents.n_epoch if self.on_policy else self.n_envs
train_info = self.agents.train(self.current_step, n_epoch=n_epoch) # train
train_info["Train-Results/Train-Episode-Rewards"] = np.mean(episode_score)
train_info["Train-Results/Episode-Steps"] = np.mean(episode_step)
self.log_infos(train_info, self.current_step)

mean_episode_score = np.mean(episode_score)
Expand Down

0 comments on commit 575072e

Please sign in to comment.