Skip to content

Commit

Permalink
Merge pull request #61 from beduffy/master
Browse files Browse the repository at this point in the history
Renamed args.tau to args.gae_lambda
  • Loading branch information
ikostrikov2 authored Mar 20, 2019
2 parents 8826e21 + 26a9678 commit 48d9584
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
help='learning rate (default: 0.0001)')
parser.add_argument('--gamma', type=float, default=0.99,
help='discount factor for rewards (default: 0.99)')
parser.add_argument('--tau', type=float, default=1.00,
help='parameter for GAE (default: 1.00)')
parser.add_argument('--gae-lambda', type=float, default=1.00,
help='lambda parameter for GAE (default: 1.00)')
parser.add_argument('--entropy-coef', type=float, default=0.01,
help='entropy term coefficient (default: 0.01)')
parser.add_argument('--value-loss-coef', type=float, default=0.5,
Expand Down
4 changes: 2 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ def train(rank, args, shared_model, counter, lock, optimizer=None):
advantage = R - values[i]
value_loss = value_loss + 0.5 * advantage.pow(2)

# Generalized Advantage Estimataion
# Generalized Advantage Estimation
delta_t = rewards[i] + args.gamma * \
values[i + 1] - values[i]
gae = gae * args.gamma * args.tau + delta_t
gae = gae * args.gamma * args.gae_lambda + delta_t

policy_loss = policy_loss - \
log_probs[i] * gae.detach() - args.entropy_coef * entropies[i]
Expand Down

0 comments on commit 48d9584

Please sign in to comment.