Skip to content

Commit

Permalink
V0.1.2 : Add progress bar
Browse files Browse the repository at this point in the history
V0.1.2
  • Loading branch information
bruzat authored Jan 16, 2021
2 parents c2eb003 + 04740b6 commit ab3e61b
Show file tree
Hide file tree
Showing 10 changed files with 205 additions and 844 deletions.
3 changes: 3 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- [x] List Environments for start project
- [x] Add gpu option
- [x] Render on notebook/collab
- [x] Add progress bar for training

# Agents list

Expand Down Expand Up @@ -85,6 +86,8 @@
- [ ] Add temporal difference option in all memories
- [x] Add Discount reward in experience replay

- [ ] Add average reward

# Environments list

- [x] Gym CartPole
20 changes: 6 additions & 14 deletions blobrl/explorations/adaptative_epsilon_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,29 @@

class AdaptativeEpsilonGreedy(EpsilonGreedy):

def __init__(self, epsilon_max, epsilon_min, step_max, step_min=0):
def __init__(self, epsilon_max, epsilon_min, gamma=0.9999):
""" Create AdaptativeEpsilonGreedy
:param epsilon_max: value for start exploration
:type epsilon_min: float [0.0,1.0], epsilon_max>epsilon_min
:param epsilon_min: min value exploration
:type epsilon_min: float [0.0,1.0], epsilon_min<epsilon_max
:param step_max: step where epsilon start to decrease
:type step_max: int
:param step_min: step where greedy return always False
:type step_min: int
:param gamma: decrease factor for epsilon
:type gamma: float [0.0,1.0]
"""
super().__init__(epsilon_max)
self.epsilon_max = epsilon_max
self.epsilon_min = epsilon_min
self.step_max = step_max
self.step_min = step_min
self.gamma = gamma

def be_greedy(self, step):
""" Return greedy
:param step: id of step
:type step: int
"""
if step <= self.step_min:
return False

a = (1 / (1 - (self.epsilon_min / self.epsilon_max)) - 1) * self.step_max
self.epsilon = max((1 - (step / (self.step_max + a))) * self.epsilon_max, self.epsilon_min)
self.epsilon = max(self.epsilon * self.gamma, self.epsilon_min)
return super().be_greedy(step)

def __str__(self):
return 'AdaptativeEpsilonGreedy-' + str(self.epsilon_max) + '-' + str(self.epsilon_min) + '-' + str(
self.step_max) + '-' + str(self.step_min)
return 'AdaptativeEpsilonGreedy-' + str(self.epsilon_max) + '-' + str(self.epsilon_min) + '-' + str(self.gamma)
2 changes: 1 addition & 1 deletion blobrl/memories/experience_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def sample(self, batch_size, device):
"""
idxs = np.random.randint(len(self.buffer), size=batch_size)

batch = np.array([self.get_sample(idx) for idx in idxs])
batch = np.array([self.get_sample(idx) for idx in idxs], dtype=object)

return [torch.Tensor(list(V)).to(device=device) for V in batch.T]

Expand Down
9 changes: 5 additions & 4 deletions blobrl/networks/simple_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class SimpleNetwork(BaseNetwork):
def __init__(self, observation_space, action_space):
def __init__(self, observation_space, action_space, linear_dim=64):
"""
:param observation_space:
Expand All @@ -15,12 +15,13 @@ def __init__(self, observation_space, action_space):
super().__init__(observation_space=observation_space, action_space=action_space)

self.network = nn.Sequential()
self.network.add_module("NetWorkSimple_Linear_Input", nn.Linear(np.prod(flatdim(self.observation_space)), 64))
self.network.add_module("NetWorkSimple_Linear_Input",
nn.Linear(np.prod(flatdim(self.observation_space)), linear_dim))
self.network.add_module("NetWorkSimple_LeakyReLU_Input", nn.LeakyReLU())
self.network.add_module("NetWorkSimple_Linear_1", nn.Linear(64, 64))
self.network.add_module("NetWorkSimple_Linear_1", nn.Linear(linear_dim, linear_dim))
self.network.add_module("NetWorkSimple_LeakyReLU_1", nn.LeakyReLU())

self.outputs = get_last_layers(self.action_space, last_dim=64)
self.outputs = get_last_layers(self.action_space, last_dim=linear_dim)

def forward(self, observation):
"""
Expand Down
14 changes: 10 additions & 4 deletions blobrl/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import gym
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from IPython import display

from blobrl import Logger, Record
Expand Down Expand Up @@ -101,17 +102,22 @@ def evaluate(self, logger=None, render=True):
if logger:
logger.evaluate()

def train(self, max_episode=1000, nb_evaluation=4, render=True):
def train(self, max_episode=1000, nb_evaluation=4, render=True, progress_bar=True):
"""
Star train on *max_episode* episode.
:param nb_evaluation:
:param max_episode:
:param max_episode: maximum episode to train agent
:type max_episode: int
:param nb_evaluation: number of time where we test agent without training
:type nb_evaluation: int
:param render: if show env render
:type render: bool
:param progress_bar: show or not progress bar of training
:type progress_bar: bool
"""

self.environment.reset()
for i_episode in range(1, max_episode + 1):
for i_episode in tqdm(range(1, max_episode + 1), disable=not progress_bar):
self.do_episode(logger=self.logger, render=render)
if nb_evaluation > 0:
if nb_evaluation <= 1:
Expand Down
Loading

0 comments on commit ab3e61b

Please sign in to comment.