From 98ee6b96a8eb2e7ed4344b74c39ed47b1522e5b7 Mon Sep 17 00:00:00 2001 From: Onno Eberhard Date: Thu, 16 Mar 2023 17:28:13 +0100 Subject: [PATCH] Complete vectorization --- README.md | 16 ++++++++-------- pink/sb3.py | 45 +++++++++++++++++++++------------------------ pink/tonic.py | 14 ++++++++++---- pyproject.toml | 2 +- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9db8c56..1a63426 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ from pink import PinkNoiseDist, PinkActionNoise # Initialize environment env = gym.make("MountainCarContinuous-v0") -action_dim = env.action_space.shape[-1] seq_len = env._max_episode_steps +action_dim = env.action_space.shape[-1] ``` #### SAC @@ -38,10 +38,10 @@ seq_len = env._max_episode_steps model = SAC("MlpPolicy", env) # Set action noise -model.actor.action_dist = PinkNoiseDist(action_dim, seq_len) +model.actor.action_dist = PinkNoiseDist(seq_len, action_dim) # Train agent -model.learn(total_timesteps=10_000) +model.learn(total_timesteps=100_000) ``` #### TD3 @@ -50,11 +50,11 @@ model.learn(total_timesteps=10_000) model = TD3("MlpPolicy", env) # Set action noise -noise_scale = 0.3*np.ones(action_dim) -model.action_noise = PinkActionNoise(noise_scale, seq_len) +noise_scale = 0.3 +model.action_noise = PinkActionNoise(noise_scale, seq_len, action_dim) # Train agent -model.learn(total_timesteps=10_000) +model.learn(total_timesteps=100_000) ``` ### Tonic: MPO @@ -73,7 +73,7 @@ model = MPO_CN() model.initialize(beta, seq_len, env.observation_space, env.action_space) # Train agent -trainer = tonic.Trainer(steps=10_000) +trainer = tonic.Trainer(steps=100_000) trainer.initialize(model, env) trainer.run() ``` @@ -92,4 +92,4 @@ If you use this code in your research, please cite our paper: } ``` -If there are any problems, or you have a question, don't hesitate to open an issue here on GitHub. +If there are any problems, or if you have a question, don't hesitate to open an issue here on GitHub. diff --git a/pink/sb3.py b/pink/sb3.py index c4a2e4b..32d3365 100644 --- a/pink/sb3.py +++ b/pink/sb3.py @@ -37,22 +37,21 @@ def __init__(self, beta, sigma, seq_len, action_dim=None, rng=None): assert (action_dim is not None) == np.isscalar(beta), \ "`action_dim` has to be specified if and only if `beta` is a scalar." - self._sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma) - if action_dim is None: - self._vectorized = False - self._beta = np.asarray(beta) - self._gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng) - for b, s in zip(self._beta, self._sigma)] + self.sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma) + + if np.isscalar(beta): + self.beta = beta + self.gen = ColoredNoiseProcess(beta=self.beta, scale=self.sigma, size=(action_dim, seq_len), rng=rng) else: - self._vectorized = True - self._beta = beta - self._gen = ColoredNoiseProcess(beta=self._beta, scale=self._sigma, size=(action_dim, seq_len), rng=rng) + self.beta = np.asarray(beta) + self.gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng) + for b, s in zip(self.beta, self.sigma)] def __call__(self) -> np.ndarray: - return self._gen.sample() if self._vectorized else np.asarray([g.sample() for g in self._gen]) + return self.gen.sample() if np.isscalar(self.beta) else np.asarray([g.sample() for g in self.gen]) def __repr__(self) -> str: - return f"ColoredActionNoise(beta={self._beta}, sigma={self._sigma})" + return f"ColoredActionNoise(beta={self.beta}, sigma={self.sigma})" class PinkActionNoise(ColoredActionNoise): @@ -108,27 +107,25 @@ class (`SquashedDiagGaussianDistribution`). assert (action_dim is not None) == np.isscalar(beta), \ "`action_dim` has to be specified if and only if `beta` is a scalar." - if action_dim is None: - super().__init__(len(beta), epsilon) - self._vectorized = False - self._beta = np.asarray(beta) - self._gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self._beta] - else: + if np.isscalar(beta): super().__init__(action_dim, epsilon) - self._vectorized = True - self._beta = beta - self._gen = ColoredNoiseProcess(beta=self._beta, size=(action_dim, seq_len), rng=rng) + self.beta = beta + self.gen = ColoredNoiseProcess(beta=self.beta, size=(action_dim, seq_len), rng=rng) + else: + super().__init__(len(beta), epsilon) + self.beta = np.asarray(beta) + self.gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self.beta] def sample(self) -> th.Tensor: - if self._vectorized: - cn_sample = th.tensor(self._gen.sample()).float() + if np.isscalar(self.beta): + cn_sample = th.tensor(self.gen.sample()).float() else: - cn_sample = th.tensor([cnp.sample() for cnp in self._gen]).float() + cn_sample = th.tensor([cnp.sample() for cnp in self.gen]).float() self.gaussian_actions = self.distribution.mean + self.distribution.stddev*cn_sample return th.tanh(self.gaussian_actions) def __repr__(self) -> str: - return f"ColoredNoiseDist(beta={self._beta})" + return f"ColoredNoiseDist(beta={self.beta})" class PinkNoiseDist(ColoredNoiseDist): diff --git a/pink/tonic.py b/pink/tonic.py index 1b361cb..79366c2 100644 --- a/pink/tonic.py +++ b/pink/tonic.py @@ -22,13 +22,19 @@ def initialize(self, beta, seq_len, observation_space, action_space, rng=None, s def set_beta(self, beta): if np.isscalar(beta): - beta = [beta] * self.action_space.shape[0] - self.cn_processes = [ - ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in beta] + self.beta = beta + self.gen = ColoredNoiseProcess( + beta=self.beta, size=(self.action_space.shape[0], self.seq_len), rng=self.rng) + else: + self.beta = np.asarray(beta) + self.gen = [ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in self.beta] def _step(self, observations): observations = th.as_tensor(observations, dtype=th.float32) - cn_sample = th.tensor([[cnp.sample() for cnp in self.cn_processes]]).float() + if np.isscalar(self.beta): + cn_sample = th.tensor(self.gen.sample()).float() + else: + cn_sample = th.tensor([[cnp.sample() for cnp in self.gen]]).float() with th.no_grad(): loc = self.model.actor(observations).loc scale = self.model.actor(observations).scale diff --git a/pyproject.toml b/pyproject.toml index d14595a..a04315e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pink-noise-rl" -version = "1.0.1" +version = "2.0.0" description = "Pink noise for exploration in reinforcement learning" authors = ["Onno Eberhard "] license = "MIT"