From 32bef7b01c2ab9bac7efa7268273ef5ebc51a801 Mon Sep 17 00:00:00 2001 From: Jonathon Shen Date: Sat, 9 Jun 2018 12:28:00 -0700 Subject: [PATCH] Removed all references/usage of GymEnv --- .../examples/trpo_gym_Acrobot-v1.py | 38 ----- .../examples/trpo_gym_CartPole-v0.py | 38 ----- .../examples/trpo_gym_CartPole-v1.py | 38 ----- .../examples/trpo_gym_MountainCar-v0.py | 40 ----- .../examples/trpo_gym_Pendulum-v0.py | 38 ----- docs/user/gym_integration.rst | 93 ------------ examples/cluster_gym_mujoco_demo.py | 75 --------- examples/trpo_gym_cartpole.py | 49 ------ examples/trpo_gym_pendulum.py | 48 ------ examples/trpo_gym_tf_cartpole.py | 37 ----- rllab/envs/__init__.py | 1 - rllab/envs/gym_env.py | 142 ------------------ tests/envs/test_envs.py | 2 - 13 files changed, 639 deletions(-) delete mode 100644 contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py delete mode 100644 contrib/bichengcao/examples/trpo_gym_CartPole-v0.py delete mode 100644 contrib/bichengcao/examples/trpo_gym_CartPole-v1.py delete mode 100644 contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py delete mode 100644 contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py delete mode 100644 docs/user/gym_integration.rst delete mode 100644 examples/cluster_gym_mujoco_demo.py delete mode 100644 examples/trpo_gym_cartpole.py delete mode 100644 examples/trpo_gym_pendulum.py delete mode 100644 examples/trpo_gym_tf_cartpole.py delete mode 100644 rllab/envs/gym_env.py diff --git a/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py b/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py deleted file mode 100644 index 4aaa6617a..000000000 --- a/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py +++ /dev/null @@ -1,38 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.misc import run_experiment_lite -from rllab.policies import CategoricalMLPPolicy - - -def run_task(*_): - env = normalize(GymEnv("Acrobot-v1")) - - policy = CategoricalMLPPolicy( - env_spec=env.spec, - hidden_sizes=(32, 32) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - n_parallel=1, - snapshot_mode="last", - plot=True, -) diff --git a/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py b/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py deleted file mode 100644 index a4cdcef96..000000000 --- a/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py +++ /dev/null @@ -1,38 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.misc import run_experiment_lite -from rllab.policies import CategoricalMLPPolicy - - -def run_task(*_): - env = normalize(GymEnv("CartPole-v0")) - - policy = CategoricalMLPPolicy( - env_spec=env.spec, - hidden_sizes=(32, 32) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - n_parallel=1, - snapshot_mode="last", - plot=True, -) diff --git a/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py b/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py deleted file mode 100644 index b29e5c074..000000000 --- a/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py +++ /dev/null @@ -1,38 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.misc import run_experiment_lite -from rllab.policies import CategoricalMLPPolicy - - -def run_task(*_): - env = normalize(GymEnv("CartPole-v1")) - - policy = CategoricalMLPPolicy( - env_spec=env.spec, - hidden_sizes=(32, 32) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - n_parallel=1, - snapshot_mode="last", - plot=True, -) diff --git a/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py b/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py deleted file mode 100644 index 1f0404a91..000000000 --- a/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py +++ /dev/null @@ -1,40 +0,0 @@ -# This doesn't work. After 150 iterations still didn't learn anything. - -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.misc import run_experiment_lite -from rllab.policies import CategoricalMLPPolicy - - -def run_task(*_): - env = normalize(GymEnv("MountainCar-v0")) - - policy = CategoricalMLPPolicy( - env_spec=env.spec, - hidden_sizes=(32, 32) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=150, - discount=0.99, - step_size=0.1, - plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - n_parallel=1, - snapshot_mode="last", - plot=True, -) diff --git a/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py b/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py deleted file mode 100644 index 3ffc3b1bd..000000000 --- a/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py +++ /dev/null @@ -1,38 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.misc import run_experiment_lite -from rllab.policies import GaussianMLPPolicy - - -def run_task(*_): - env = normalize(GymEnv("Pendulum-v0")) - - policy = GaussianMLPPolicy( - env_spec=env.spec, - hidden_sizes=(32, 32) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - n_parallel=1, - snapshot_mode="last", - plot=True, -) diff --git a/docs/user/gym_integration.rst b/docs/user/gym_integration.rst deleted file mode 100644 index 7ff4fe4ec..000000000 --- a/docs/user/gym_integration.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. _gym_integration: - - - -=========================== -Integrating with OpenAI Gym -=========================== - -`OpenAI Gym `_ is a recently released reinforcement learning toolkit that contains a wide -range of environments and an online scoreboard. rllab now provides a wrapper to run algorithms in rllab on environments -from OpenAI Gym, as well as submitting the results to the scoreboard. The example script in :code:`examples/trpo_gym_pendulum.py` -provides a simple example of training an agent on the :code:`Pendulum-v0` environment. The content of the file is as follows: - - -.. code-block:: python - - from rllab.algos.trpo import TRPO - from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline - from rllab.envs.gym_env import GymEnv - from rllab.envs.normalized_env import normalize - from rllab.misc.instrument import run_experiment_lite - from rllab.policies.gaussian_mlp_policy import GaussianMLPPolicy - - - def run_task(*_): - env = normalize(GymEnv("Pendulum-v0")) - - policy = GaussianMLPPolicy( - env_spec=env.spec, - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(8, 8) - ) - - baseline = LinearFeatureBaseline(env_spec=env.spec) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - # Uncomment both lines (this and the plot parameter below) to enable plotting - # plot=True, - ) - algo.train() - - - run_experiment_lite( - run_task, - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=1, - # plot=True, - ) - - -Running the script will automatically record the episodic total reward and -periodically record video. When the script finishes running, you will see an -instruction of how to upload it to the online scoreboard, similar to the following -text (you will need to first register for an account on https://gym.openai.com, -and set the environment variable :code:`OPENAI_GYM_API_KEY` to be your API key): - - -.. code-block:: bash - - *************************** - - Training finished! You can upload results to OpenAI Gym by running the following command: - - python scripts/submit_gym.py data/local/experiment/experiment_2016_04_27_18_32_31_0001/gym_log - - *************************** - - -Comparison between rllab and OpenAI Gym -======================================= - -Both rllab and OpenAI Gym set out to be frameworks for developing and evaluating reinforcement learning algorithms. - -OpenAI Gym has a wider range of supported environments, as well as an online scoreboard for sharing the training results. -It makes no assumptions of how the agent should be implemented. - -rllab offers a set of built-in implementations of RL algorithms. These implementations are agnostic how the environment -or the policy is laid out, as well as fine grained components for developing and experimenting with new reinforcement -learning algorithms. rllab is fully compatible with OpenAI Gym. The rllab reference implementations of a wide range of -RL algorithms enable faster experimentation and rllab provides seamless upload to Gym’s scoreboard. diff --git a/examples/cluster_gym_mujoco_demo.py b/examples/cluster_gym_mujoco_demo.py deleted file mode 100644 index 8a88c376e..000000000 --- a/examples/cluster_gym_mujoco_demo.py +++ /dev/null @@ -1,75 +0,0 @@ -import sys - -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.envs.gym_util.env_util import spec -from rllab.misc import run_experiment_lite - -from sandbox.rocky.tf.algos import TRPO -from sandbox.rocky.tf.envs import TfEnv -from sandbox.rocky.tf.policies import GaussianMLPPolicy - -from rllab.misc import VariantGenerator, variant - - -class VG(VariantGenerator): - @variant - def step_size(self): - return [0.01, 0.05, 0.1] - - @variant - def seed(self): - return [1, 11, 21, 31, 41] - - -def run_task(vv): - - env = TfEnv( - normalize( - GymEnv('HalfCheetah-v1', record_video=False, record_log=False))) - - policy = GaussianMLPPolicy( - env_spec=spec(env), - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(32, 32), - name="policy") - - baseline = LinearFeatureBaseline(env_spec=spec(env)) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=100, - n_itr=40, - discount=0.99, - step_size=vv["step_size"], - # Uncomment both lines (this and the plot parameter below) to enable plotting - # plot=True, - ) - algo.train() - - -variants = VG().variants() - -for v in variants: - - run_experiment_lite( - run_task, - exp_prefix="first_exp", - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=v["seed"], - # mode="local", - mode="ec2", - variant=v, - # plot=True, - # terminate_machine=False, - ) - sys.exit() diff --git a/examples/trpo_gym_cartpole.py b/examples/trpo_gym_cartpole.py deleted file mode 100644 index 540120708..000000000 --- a/examples/trpo_gym_cartpole.py +++ /dev/null @@ -1,49 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.envs.gym_util.env_util import spec -from rllab.misc import run_experiment_lite -from rllab.policies import CategoricalMLPPolicy - - -def run_task(*_): - # Please note that different environments with different action spaces may - # require different policies. For example with a Discrete action space, a - # CategoricalMLPPolicy works, but for a Box action space may need to use - # a GaussianMLPPolicy (see the trpo_gym_pendulum.py example) - env = normalize(GymEnv("CartPole-v0")) - - policy = CategoricalMLPPolicy( - env_spec=spec(env), - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(32, 32)) - - baseline = LinearFeatureBaseline(env_spec=spec(env)) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - # Uncomment both lines (this and the plot parameter below) to enable plotting - # plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=1, - # plot=True, -) diff --git a/examples/trpo_gym_pendulum.py b/examples/trpo_gym_pendulum.py deleted file mode 100644 index a37865e18..000000000 --- a/examples/trpo_gym_pendulum.py +++ /dev/null @@ -1,48 +0,0 @@ -from rllab.algos import TRPO -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.envs.gym_util.env_util import spec -from rllab.misc import run_experiment_lite -from rllab.policies import GaussianMLPPolicy - - -def run_task(*_): - # Please note that different environments with different action spaces may require different - # policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete - # action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example) - env = normalize(GymEnv("Pendulum-v0")) - - policy = GaussianMLPPolicy( - env_spec=spec(env), - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(32, 32)) - - baseline = LinearFeatureBaseline(env_spec=spec(env)) - - algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=env.horizon, - n_itr=50, - discount=0.99, - step_size=0.01, - # Uncomment both lines (this and the plot parameter below) to enable plotting - # plot=True, - ) - algo.train() - - -run_experiment_lite( - run_task, - # Number of parallel workers for sampling - n_parallel=1, - # Only keep the snapshot parameters for the last iteration - snapshot_mode="last", - # Specifies the seed for the experiment. If this is not provided, a random seed - # will be used - seed=1, - # plot=True, -) diff --git a/examples/trpo_gym_tf_cartpole.py b/examples/trpo_gym_tf_cartpole.py deleted file mode 100644 index 5c00e1758..000000000 --- a/examples/trpo_gym_tf_cartpole.py +++ /dev/null @@ -1,37 +0,0 @@ -from rllab.baselines import LinearFeatureBaseline -from rllab.envs import GymEnv -from rllab.envs import normalize -from rllab.envs.gym_util.env_util import spec -from rllab.misc import stub, run_experiment_lite - -from sandbox.rocky.tf.envs import TfEnv -from sandbox.rocky.tf.policies import CategoricalMLPPolicy -from sandbox.rocky.tf.algos import TRPO - -stub(globals()) - -# Need to wrap in a tf environment and force_reset to true -# see https://github.com/openai/rllab/issues/87#issuecomment-282519288 -env = TfEnv(normalize(GymEnv("CartPole-v0", force_reset=True))) - -policy = CategoricalMLPPolicy( - name="policy", - env_spec=spec(env), - # The neural network policy should have two hidden layers, each with 32 hidden units. - hidden_sizes=(32, 32)) - -baseline = LinearFeatureBaseline(env_spec=spec(env)) - -algo = TRPO( - env=env, - policy=policy, - baseline=baseline, - batch_size=4000, - max_path_length=200, - n_itr=120, - discount=0.99, - step_size=0.01, - # optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5)) -) - -run_experiment_lite(algo.train(), n_parallel=1, snapshot_mode="last", seed=1) diff --git a/rllab/envs/__init__.py b/rllab/envs/__init__.py index f5551c06d..0926a79af 100644 --- a/rllab/envs/__init__.py +++ b/rllab/envs/__init__.py @@ -2,7 +2,6 @@ from rllab.envs.base import EnvSpec from rllab.envs.base import Step from rllab.envs.grid_world_env import GridWorldEnv -from rllab.envs.gym_env import GymEnv from rllab.envs.identification_env import IdentificationEnv from rllab.envs.noisy_env import NoisyObservationEnv from rllab.envs.noisy_env import DelayedActionEnv diff --git a/rllab/envs/gym_env.py b/rllab/envs/gym_env.py deleted file mode 100644 index 770a3292b..000000000 --- a/rllab/envs/gym_env.py +++ /dev/null @@ -1,142 +0,0 @@ -import gym -import gym.wrappers -import gym.envs -import gym.spaces -import traceback -import logging - -try: - from gym import logger as monitor_logger - - monitor_logger.setLevel(logging.WARNING) -except Exception as e: - traceback.print_exc() - -import os -import os.path as osp -from rllab.envs import Env, Step -from rllab.core import Serializable -from rllab.spaces import Box -from rllab.spaces import Discrete -from rllab.spaces import Product -from rllab.misc import logger - - -def convert_gym_space(space): - if isinstance(space, gym.spaces.Box): - return Box(low=space.low, high=space.high) - elif isinstance(space, gym.spaces.Discrete): - return Discrete(n=space.n) - elif isinstance(space, gym.spaces.Tuple): - return Product([convert_gym_space(x) for x in space.spaces]) - else: - raise NotImplementedError - - -class CappedCubicVideoSchedule(object): - # Copied from gym, since this method is frequently moved around - def __call__(self, count): - if count < 1000: - return int(round(count**(1. / 3)))**3 == count - else: - return count % 1000 == 0 - - -class FixedIntervalVideoSchedule(object): - def __init__(self, interval): - self.interval = interval - - def __call__(self, count): - return count % self.interval == 0 - - -class NoVideoSchedule(object): - def __call__(self, count): - return False - - -class GymEnv(Env, Serializable): - def __init__(self, - env_name, - record_video=True, - video_schedule=None, - log_dir=None, - record_log=True, - force_reset=False): - if log_dir is None: - if logger.get_snapshot_dir() is None: - logger.log( - "Warning: skipping Gym environment monitoring since snapshot_dir not configured." - ) - else: - log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") - Serializable.quick_init(self, locals()) - - env = gym.envs.make(env_name) - self.env = env - self.env_id = env.spec.id - - assert not (not record_log and record_video) - - if log_dir is None or record_log is False: - self.monitoring = False - else: - if not record_video: - video_schedule = NoVideoSchedule() - else: - if video_schedule is None: - video_schedule = CappedCubicVideoSchedule() - self.env = gym.wrappers.Monitor( - self.env, log_dir, video_callable=video_schedule, force=True) - self.monitoring = True - - self._observation_space = convert_gym_space(env.observation_space) - logger.log("observation space: {}".format(self._observation_space)) - self._action_space = convert_gym_space(env.action_space) - logger.log("action space: {}".format(self._action_space)) - self._horizon = env.spec.tags[ - 'wrapper_config.TimeLimit.max_episode_steps'] - self._log_dir = log_dir - self._force_reset = force_reset - - @property - def observation_space(self): - return self._observation_space - - @property - def action_space(self): - return self._action_space - - @property - def horizon(self): - return self._horizon - - def reset(self): - if self._force_reset and self.monitoring: - from gym.wrappers.monitoring import Monitor - assert isinstance(self.env, Monitor) - recorder = self.env.stats_recorder - if recorder is not None: - recorder.done = True - return self.env.reset() - - def step(self, action): - next_obs, reward, done, info = self.env.step(action) - return Step(next_obs, reward, done, **info) - - def render(self): - self.env.render() - - def terminate(self): - if self.monitoring: - self.env._close() - if self._log_dir is not None: - print(""" - *************************** - - Training finished! You can upload results to OpenAI Gym by running the following command: - - python scripts/submit_gym.py %s - - *************************** - """ % self._log_dir) diff --git a/tests/envs/test_envs.py b/tests/envs/test_envs.py index 00edb3513..11f2d2b8c 100644 --- a/tests/envs/test_envs.py +++ b/tests/envs/test_envs.py @@ -34,7 +34,6 @@ from rllab.envs import NoisyObservationEnv, DelayedActionEnv from rllab.envs import NormalizedEnv from rllab.envs import ProxyEnv -from rllab.envs import GymEnv simple_env_classes = [ GridWorldEnv, @@ -68,7 +67,6 @@ envs.append(NoisyObservationEnv(CartpoleEnv())) envs.append(DelayedActionEnv(CartpoleEnv())) envs.append(NormalizedEnv(CartpoleEnv())) -envs.append(GymEnv('CartPole-v0')) @tools.params(*envs)