From 98ee6b96a8eb2e7ed4344b74c39ed47b1522e5b7 Mon Sep 17 00:00:00 2001
From: Onno Eberhard <onnoeberhard@gmail.com>
Date: Thu, 16 Mar 2023 17:28:13 +0100
Subject: [PATCH] Complete vectorization

---
 README.md      | 16 ++++++++--------
 pink/sb3.py    | 45 +++++++++++++++++++++------------------------
 pink/tonic.py  | 14 ++++++++++----
 pyproject.toml |  2 +-
 4 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 9db8c56..1a63426 100644
--- a/README.md
+++ b/README.md
@@ -28,8 +28,8 @@ from pink import PinkNoiseDist, PinkActionNoise
 
 # Initialize environment
 env = gym.make("MountainCarContinuous-v0")
-action_dim = env.action_space.shape[-1]
 seq_len = env._max_episode_steps
+action_dim = env.action_space.shape[-1]
 ```
 
 #### SAC
@@ -38,10 +38,10 @@ seq_len = env._max_episode_steps
 model = SAC("MlpPolicy", env)
 
 # Set action noise
-model.actor.action_dist = PinkNoiseDist(action_dim, seq_len)
+model.actor.action_dist = PinkNoiseDist(seq_len, action_dim)
 
 # Train agent
-model.learn(total_timesteps=10_000)
+model.learn(total_timesteps=100_000)
 ```
 
 #### TD3
@@ -50,11 +50,11 @@ model.learn(total_timesteps=10_000)
 model = TD3("MlpPolicy", env)
 
 # Set action noise
-noise_scale = 0.3*np.ones(action_dim)
-model.action_noise = PinkActionNoise(noise_scale, seq_len)
+noise_scale = 0.3
+model.action_noise = PinkActionNoise(noise_scale, seq_len, action_dim)
 
 # Train agent
-model.learn(total_timesteps=10_000)
+model.learn(total_timesteps=100_000)
 ```
 
 ### Tonic: MPO
@@ -73,7 +73,7 @@ model = MPO_CN()
 model.initialize(beta, seq_len, env.observation_space, env.action_space)
 
 # Train agent
-trainer = tonic.Trainer(steps=10_000)
+trainer = tonic.Trainer(steps=100_000)
 trainer.initialize(model, env)
 trainer.run()
 ```
@@ -92,4 +92,4 @@ If you use this code in your research, please cite our paper:
 }
 ```
 
-If there are any problems, or you have a question, don't hesitate to open an issue here on GitHub.
+If there are any problems, or if you have a question, don't hesitate to open an issue here on GitHub.
diff --git a/pink/sb3.py b/pink/sb3.py
index c4a2e4b..32d3365 100644
--- a/pink/sb3.py
+++ b/pink/sb3.py
@@ -37,22 +37,21 @@ def __init__(self, beta, sigma, seq_len, action_dim=None, rng=None):
         assert (action_dim is not None) == np.isscalar(beta), \
             "`action_dim` has to be specified if and only if `beta` is a scalar."
 
-        self._sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma)
-        if action_dim is None:
-            self._vectorized = False
-            self._beta = np.asarray(beta)
-            self._gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng)
-                         for b, s in zip(self._beta, self._sigma)]
+        self.sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma)
+
+        if np.isscalar(beta):
+            self.beta = beta
+            self.gen = ColoredNoiseProcess(beta=self.beta, scale=self.sigma, size=(action_dim, seq_len), rng=rng)
         else:
-            self._vectorized = True
-            self._beta = beta
-            self._gen = ColoredNoiseProcess(beta=self._beta, scale=self._sigma, size=(action_dim, seq_len), rng=rng)
+            self.beta = np.asarray(beta)
+            self.gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng)
+                        for b, s in zip(self.beta, self.sigma)]
 
     def __call__(self) -> np.ndarray:
-        return self._gen.sample() if self._vectorized else np.asarray([g.sample() for g in self._gen])
+        return self.gen.sample() if np.isscalar(self.beta) else np.asarray([g.sample() for g in self.gen])
 
     def __repr__(self) -> str:
-        return f"ColoredActionNoise(beta={self._beta}, sigma={self._sigma})"
+        return f"ColoredActionNoise(beta={self.beta}, sigma={self.sigma})"
 
 
 class PinkActionNoise(ColoredActionNoise):
@@ -108,27 +107,25 @@ class (`SquashedDiagGaussianDistribution`).
         assert (action_dim is not None) == np.isscalar(beta), \
             "`action_dim` has to be specified if and only if `beta` is a scalar."
 
-        if action_dim is None:
-            super().__init__(len(beta), epsilon)
-            self._vectorized = False
-            self._beta = np.asarray(beta)
-            self._gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self._beta]
-        else:
+        if np.isscalar(beta):
             super().__init__(action_dim, epsilon)
-            self._vectorized = True
-            self._beta = beta
-            self._gen = ColoredNoiseProcess(beta=self._beta, size=(action_dim, seq_len), rng=rng)
+            self.beta = beta
+            self.gen = ColoredNoiseProcess(beta=self.beta, size=(action_dim, seq_len), rng=rng)
+        else:
+            super().__init__(len(beta), epsilon)
+            self.beta = np.asarray(beta)
+            self.gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self.beta]
 
     def sample(self) -> th.Tensor:
-        if self._vectorized:
-            cn_sample = th.tensor(self._gen.sample()).float()
+        if np.isscalar(self.beta):
+            cn_sample = th.tensor(self.gen.sample()).float()
         else:
-            cn_sample = th.tensor([cnp.sample() for cnp in self._gen]).float()
+            cn_sample = th.tensor([cnp.sample() for cnp in self.gen]).float()
         self.gaussian_actions = self.distribution.mean + self.distribution.stddev*cn_sample
         return th.tanh(self.gaussian_actions)
 
     def __repr__(self) -> str:
-        return f"ColoredNoiseDist(beta={self._beta})"
+        return f"ColoredNoiseDist(beta={self.beta})"
 
 
 class PinkNoiseDist(ColoredNoiseDist):
diff --git a/pink/tonic.py b/pink/tonic.py
index 1b361cb..79366c2 100644
--- a/pink/tonic.py
+++ b/pink/tonic.py
@@ -22,13 +22,19 @@ def initialize(self, beta, seq_len, observation_space, action_space, rng=None, s
 
     def set_beta(self, beta):
         if np.isscalar(beta):
-            beta = [beta] * self.action_space.shape[0]
-        self.cn_processes = [
-            ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in beta]
+            self.beta = beta
+            self.gen = ColoredNoiseProcess(
+                beta=self.beta, size=(self.action_space.shape[0], self.seq_len), rng=self.rng)
+        else:
+            self.beta = np.asarray(beta)
+            self.gen = [ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in self.beta]
 
     def _step(self, observations):
         observations = th.as_tensor(observations, dtype=th.float32)
-        cn_sample = th.tensor([[cnp.sample() for cnp in self.cn_processes]]).float()
+        if np.isscalar(self.beta):
+            cn_sample = th.tensor(self.gen.sample()).float()
+        else:
+            cn_sample = th.tensor([[cnp.sample() for cnp in self.gen]]).float()
         with th.no_grad():
             loc = self.model.actor(observations).loc
             scale = self.model.actor(observations).scale
diff --git a/pyproject.toml b/pyproject.toml
index d14595a..a04315e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pink-noise-rl"
-version = "1.0.1"
+version = "2.0.0"
 description = "Pink noise for exploration in reinforcement learning"
 authors = ["Onno Eberhard <onnoeberhard@gmail.com>"]
 license = "MIT"