Skip to content

Commit

Permalink
Complete vectorization
Browse files Browse the repository at this point in the history
  • Loading branch information
onnoeberhard committed Mar 16, 2023
1 parent c1db4be commit 98ee6b9
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 37 deletions.
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ from pink import PinkNoiseDist, PinkActionNoise

# Initialize environment
env = gym.make("MountainCarContinuous-v0")
action_dim = env.action_space.shape[-1]
seq_len = env._max_episode_steps
action_dim = env.action_space.shape[-1]
```

#### SAC
Expand All @@ -38,10 +38,10 @@ seq_len = env._max_episode_steps
model = SAC("MlpPolicy", env)

# Set action noise
model.actor.action_dist = PinkNoiseDist(action_dim, seq_len)
model.actor.action_dist = PinkNoiseDist(seq_len, action_dim)

# Train agent
model.learn(total_timesteps=10_000)
model.learn(total_timesteps=100_000)
```

#### TD3
Expand All @@ -50,11 +50,11 @@ model.learn(total_timesteps=10_000)
model = TD3("MlpPolicy", env)

# Set action noise
noise_scale = 0.3*np.ones(action_dim)
model.action_noise = PinkActionNoise(noise_scale, seq_len)
noise_scale = 0.3
model.action_noise = PinkActionNoise(noise_scale, seq_len, action_dim)

# Train agent
model.learn(total_timesteps=10_000)
model.learn(total_timesteps=100_000)
```

### Tonic: MPO
Expand All @@ -73,7 +73,7 @@ model = MPO_CN()
model.initialize(beta, seq_len, env.observation_space, env.action_space)

# Train agent
trainer = tonic.Trainer(steps=10_000)
trainer = tonic.Trainer(steps=100_000)
trainer.initialize(model, env)
trainer.run()
```
Expand All @@ -92,4 +92,4 @@ If you use this code in your research, please cite our paper:
}
```

If there are any problems, or you have a question, don't hesitate to open an issue here on GitHub.
If there are any problems, or if you have a question, don't hesitate to open an issue here on GitHub.
45 changes: 21 additions & 24 deletions pink/sb3.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,21 @@ def __init__(self, beta, sigma, seq_len, action_dim=None, rng=None):
assert (action_dim is not None) == np.isscalar(beta), \
"`action_dim` has to be specified if and only if `beta` is a scalar."

self._sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma)
if action_dim is None:
self._vectorized = False
self._beta = np.asarray(beta)
self._gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng)
for b, s in zip(self._beta, self._sigma)]
self.sigma = np.full(action_dim or len(beta), sigma) if np.isscalar(sigma) else np.asarray(sigma)

if np.isscalar(beta):
self.beta = beta
self.gen = ColoredNoiseProcess(beta=self.beta, scale=self.sigma, size=(action_dim, seq_len), rng=rng)
else:
self._vectorized = True
self._beta = beta
self._gen = ColoredNoiseProcess(beta=self._beta, scale=self._sigma, size=(action_dim, seq_len), rng=rng)
self.beta = np.asarray(beta)
self.gen = [ColoredNoiseProcess(beta=b, scale=s, size=seq_len, rng=rng)
for b, s in zip(self.beta, self.sigma)]

def __call__(self) -> np.ndarray:
return self._gen.sample() if self._vectorized else np.asarray([g.sample() for g in self._gen])
return self.gen.sample() if np.isscalar(self.beta) else np.asarray([g.sample() for g in self.gen])

def __repr__(self) -> str:
return f"ColoredActionNoise(beta={self._beta}, sigma={self._sigma})"
return f"ColoredActionNoise(beta={self.beta}, sigma={self.sigma})"


class PinkActionNoise(ColoredActionNoise):
Expand Down Expand Up @@ -108,27 +107,25 @@ class (`SquashedDiagGaussianDistribution`).
assert (action_dim is not None) == np.isscalar(beta), \
"`action_dim` has to be specified if and only if `beta` is a scalar."

if action_dim is None:
super().__init__(len(beta), epsilon)
self._vectorized = False
self._beta = np.asarray(beta)
self._gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self._beta]
else:
if np.isscalar(beta):
super().__init__(action_dim, epsilon)
self._vectorized = True
self._beta = beta
self._gen = ColoredNoiseProcess(beta=self._beta, size=(action_dim, seq_len), rng=rng)
self.beta = beta
self.gen = ColoredNoiseProcess(beta=self.beta, size=(action_dim, seq_len), rng=rng)
else:
super().__init__(len(beta), epsilon)
self.beta = np.asarray(beta)
self.gen = [ColoredNoiseProcess(beta=b, size=seq_len, rng=rng) for b in self.beta]

def sample(self) -> th.Tensor:
if self._vectorized:
cn_sample = th.tensor(self._gen.sample()).float()
if np.isscalar(self.beta):
cn_sample = th.tensor(self.gen.sample()).float()
else:
cn_sample = th.tensor([cnp.sample() for cnp in self._gen]).float()
cn_sample = th.tensor([cnp.sample() for cnp in self.gen]).float()
self.gaussian_actions = self.distribution.mean + self.distribution.stddev*cn_sample
return th.tanh(self.gaussian_actions)

def __repr__(self) -> str:
return f"ColoredNoiseDist(beta={self._beta})"
return f"ColoredNoiseDist(beta={self.beta})"


class PinkNoiseDist(ColoredNoiseDist):
Expand Down
14 changes: 10 additions & 4 deletions pink/tonic.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,19 @@ def initialize(self, beta, seq_len, observation_space, action_space, rng=None, s

def set_beta(self, beta):
if np.isscalar(beta):
beta = [beta] * self.action_space.shape[0]
self.cn_processes = [
ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in beta]
self.beta = beta
self.gen = ColoredNoiseProcess(
beta=self.beta, size=(self.action_space.shape[0], self.seq_len), rng=self.rng)
else:
self.beta = np.asarray(beta)
self.gen = [ColoredNoiseProcess(beta=b, size=self.seq_len, rng=self.rng) for b in self.beta]

def _step(self, observations):
observations = th.as_tensor(observations, dtype=th.float32)
cn_sample = th.tensor([[cnp.sample() for cnp in self.cn_processes]]).float()
if np.isscalar(self.beta):
cn_sample = th.tensor(self.gen.sample()).float()
else:
cn_sample = th.tensor([[cnp.sample() for cnp in self.gen]]).float()
with th.no_grad():
loc = self.model.actor(observations).loc
scale = self.model.actor(observations).scale
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pink-noise-rl"
version = "1.0.1"
version = "2.0.0"
description = "Pink noise for exploration in reinforcement learning"
authors = ["Onno Eberhard <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 98ee6b9

Please sign in to comment.