From 2c986aa862dbac62d3ba0f52445cc0dc18866154 Mon Sep 17 00:00:00 2001 From: Kaixhin Date: Sat, 18 May 2019 22:16:10 +0100 Subject: [PATCH] Add reacher-easy to DM Control Suite envs --- env.py | 4 ++-- main.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/env.py b/env.py index 21fc1e0..0b38e8d 100644 --- a/env.py +++ b/env.py @@ -4,8 +4,8 @@ GYM_ENVS = ['Pendulum-v0', 'MountainCarContinuous-v0', 'Ant-v2', 'HalfCheetah-v2', 'Hopper-v2', 'Humanoid-v2', 'HumanoidStandup-v2', 'InvertedDoublePendulum-v2', 'InvertedPendulum-v2', 'Reacher-v2', 'Swimmer-v2', 'Walker2d-v2'] -CONTROL_SUITE_ENVS = ['cartpole-balance', 'cartpole-swingup', 'finger-spin', 'cheetah-run', 'ball_in_cup-catch', 'walker-walk'] -CONTROL_SUITE_ACTION_REPEATS = {'cartpole': 8, 'finger': 2, 'cheetah': 4, 'ball_in_cup': 6, 'walker': 2} +CONTROL_SUITE_ENVS = ['cartpole-balance', 'cartpole-swingup', 'reacher-easy', 'finger-spin', 'cheetah-run', 'ball_in_cup-catch', 'walker-walk'] +CONTROL_SUITE_ACTION_REPEATS = {'cartpole': 8, 'reacher': 4, 'finger': 2, 'cheetah': 4, 'ball_in_cup': 6, 'walker': 2} class ControlSuiteEnv(): diff --git a/main.py b/main.py index db4fa1a..b694279 100644 --- a/main.py +++ b/main.py @@ -30,7 +30,7 @@ parser.add_argument('--state-size', type=int, default=30, metavar='Z', help='State/latent size') parser.add_argument('--action-repeat', type=int, default=2, metavar='R', help='Action repeat') parser.add_argument('--action-noise', type=float, default=0.3, metavar='ε', help='Action noise') -parser.add_argument('--episodes', type=int, default=2000, metavar='E', help='Total number of episodes') +parser.add_argument('--episodes', type=int, default=1000, metavar='E', help='Total number of episodes') parser.add_argument('--seed-episodes', type=int, default=5, metavar='S', help='Seed episodes') parser.add_argument('--collect-interval', type=int, default=100, metavar='C', help='Collect interval') parser.add_argument('--batch-size', type=int, default=50, metavar='B', help='Batch size') @@ -40,7 +40,7 @@ parser.add_argument('--overshooting-reward-scale', type=float, default=0, metavar='R>1', help='Latent overshooting reward prediction weight for t > 1 (0 to disable)') parser.add_argument('--global-kl-beta', type=float, default=0, metavar='βg', help='Global KL weight (0 to disable)') parser.add_argument('--free-nats', type=float, default=3, metavar='F', help='Free nats') -parser.add_argument('--learning-rate', type=float, default=1e-3, metavar='α', help='Learning rate') # TODO: Original has a linear learning rate decay, but it seems unlikely that this makes a significant difference +parser.add_argument('--learning-rate', type=float, default=1e-3, metavar='α', help='Learning rate') # Note that original has a linear learning rate decay, but it seems unlikely that this makes a significant difference parser.add_argument('--grad-clip-norm', type=float, default=1000, metavar='C', help='Gradient clipping norm') parser.add_argument('--planning-horizon', type=int, default=12, metavar='H', help='Planning horizon distance') parser.add_argument('--optimisation-iters', type=int, default=10, metavar='I', help='Planning optimisation iterations')