V0.1.2 : Add progress bar

V0.1.2
french-ai · Jan 16, 2021 · ab3e61b · ab3e61b
2 parents c2eb003 + 04740b6
commit ab3e61b
Show file tree

Hide file tree

Showing 10 changed files with 205 additions and 844 deletions.
diff --git a/TODO.md b/TODO.md
@@ -14,6 +14,7 @@
 - [x] List Environments for start project
 - [x] Add gpu option
 - [x] Render on notebook/collab
+- [x] Add progress bar for training
 
 # Agents list
 
@@ -85,6 +86,8 @@
 - [ ] Add temporal difference option in all memories
 - [x] Add Discount reward in experience replay
 
+- [ ] Add average reward 
+
 # Environments list
 
 - [x] Gym CartPole
diff --git a/blobrl/explorations/adaptative_epsilon_greedy.py b/blobrl/explorations/adaptative_epsilon_greedy.py
@@ -3,37 +3,29 @@
 
 class AdaptativeEpsilonGreedy(EpsilonGreedy):
 
-    def __init__(self, epsilon_max, epsilon_min, step_max, step_min=0):
+    def __init__(self, epsilon_max, epsilon_min, gamma=0.9999):
         """ Create AdaptativeEpsilonGreedy
 
         :param epsilon_max: value for start exploration
         :type epsilon_min: float [0.0,1.0], epsilon_max>epsilon_min
         :param epsilon_min: min value exploration
         :type epsilon_min: float [0.0,1.0], epsilon_min<epsilon_max
-        :param step_max: step where epsilon start to decrease
-        :type step_max: int
-        :param step_min: step where greedy return always False
-        :type step_min: int
+        :param gamma: decrease factor for epsilon
+        :type gamma: float [0.0,1.0]
         """
         super().__init__(epsilon_max)
         self.epsilon_max = epsilon_max
         self.epsilon_min = epsilon_min
-        self.step_max = step_max
-        self.step_min = step_min
+        self.gamma = gamma
 
     def be_greedy(self, step):
         """ Return greedy
 
         :param step: id of step
         :type step: int
         """
-        if step <= self.step_min:
-            return False
-
-        a = (1 / (1 - (self.epsilon_min / self.epsilon_max)) - 1) * self.step_max
-        self.epsilon = max((1 - (step / (self.step_max + a))) * self.epsilon_max, self.epsilon_min)
+        self.epsilon = max(self.epsilon * self.gamma, self.epsilon_min)
         return super().be_greedy(step)
 
     def __str__(self):
-        return 'AdaptativeEpsilonGreedy-' + str(self.epsilon_max) + '-' + str(self.epsilon_min) + '-' + str(
-            self.step_max) + '-' + str(self.step_min)
+        return 'AdaptativeEpsilonGreedy-' + str(self.epsilon_max) + '-' + str(self.epsilon_min) + '-' + str(self.gamma)
diff --git a/blobrl/memories/experience_replay.py b/blobrl/memories/experience_replay.py
@@ -58,7 +58,7 @@ def sample(self, batch_size, device):
         """
         idxs = np.random.randint(len(self.buffer), size=batch_size)
 
-        batch = np.array([self.get_sample(idx) for idx in idxs])
+        batch = np.array([self.get_sample(idx) for idx in idxs], dtype=object)
 
         return [torch.Tensor(list(V)).to(device=device) for V in batch.T]
 

diff --git a/blobrl/networks/simple_network.py b/blobrl/networks/simple_network.py
@@ -6,7 +6,7 @@
 
 
 class SimpleNetwork(BaseNetwork):
-    def __init__(self, observation_space, action_space):
+    def __init__(self, observation_space, action_space, linear_dim=64):
         """
 
         :param observation_space:
@@ -15,12 +15,13 @@ def __init__(self, observation_space, action_space):
         super().__init__(observation_space=observation_space, action_space=action_space)
 
         self.network = nn.Sequential()
-        self.network.add_module("NetWorkSimple_Linear_Input", nn.Linear(np.prod(flatdim(self.observation_space)), 64))
+        self.network.add_module("NetWorkSimple_Linear_Input",
+                                nn.Linear(np.prod(flatdim(self.observation_space)), linear_dim))
         self.network.add_module("NetWorkSimple_LeakyReLU_Input", nn.LeakyReLU())
-        self.network.add_module("NetWorkSimple_Linear_1", nn.Linear(64, 64))
+        self.network.add_module("NetWorkSimple_Linear_1", nn.Linear(linear_dim, linear_dim))
         self.network.add_module("NetWorkSimple_LeakyReLU_1", nn.LeakyReLU())
 
-        self.outputs = get_last_layers(self.action_space, last_dim=64)
+        self.outputs = get_last_layers(self.action_space, last_dim=linear_dim)
 
     def forward(self, observation):
         """

diff --git a/blobrl/trainer.py b/blobrl/trainer.py
@@ -4,6 +4,7 @@
 
 import gym
 import matplotlib.pyplot as plt
+from tqdm.auto import tqdm
 from IPython import display
 
 from blobrl import Logger, Record
@@ -101,17 +102,22 @@ def evaluate(self, logger=None, render=True):
         if logger:
             logger.evaluate()
 
-    def train(self, max_episode=1000, nb_evaluation=4, render=True):
+    def train(self, max_episode=1000, nb_evaluation=4, render=True, progress_bar=True):
         """
+        Star train on *max_episode* episode.
 
-        :param nb_evaluation:
-        :param max_episode:
+        :param max_episode: maximum episode to train agent
+        :type max_episode: int
+        :param nb_evaluation: number of time where we test agent without training
+        :type nb_evaluation: int
         :param render: if show env render
         :type render: bool
+        :param progress_bar: show or not progress bar of training
+        :type progress_bar: bool
         """
 
         self.environment.reset()
-        for i_episode in range(1, max_episode + 1):
+        for i_episode in tqdm(range(1, max_episode + 1), disable=not progress_bar):
             self.do_episode(logger=self.logger, render=render)
             if nb_evaluation > 0:
                 if nb_evaluation <= 1: