# This is a combination of 23 commits.

# This is the 1st commit message: ALife 2024 version # This is the commit message Kinds-of-Intelligence-CFI#2: bug fix # This is the commit message Kinds-of-Intelligence-CFI#3: added test frequency # This is the commit message Kinds-of-Intelligence-CFI#4: new config files # This is the commit message Kinds-of-Intelligence-CFI#5: bug fix # This is the commit message #6: renamed directories # This is the commit message Kinds-of-Intelligence-CFI#7: made necessary changes for data output # This is the commit message Kinds-of-Intelligence-CFI#8: bug fix # This is the commit message #9: buf fix # This is the commit message Kinds-of-Intelligence-CFI#10: bug fix # This is the commit message Kinds-of-Intelligence-CFI#11: bug fix # This is the commit message Kinds-of-Intelligence-CFI#12: bug fix # This is the commit message #13: bug fix # This is the commit message #14: bug fix # This is the commit message Kinds-of-Intelligence-CFI#15: bug fix # This is the commit message Kinds-of-Intelligence-CFI#16: bug fix # This is the commit message Kinds-of-Intelligence-CFI#17: bug fix # This is the commit message Kinds-of-Intelligence-CFI#18: bug fix # This is the commit message Kinds-of-Intelligence-CFI#19: bug fix # This is the commit message Kinds-of-Intelligence-CFI#20: work around issue # This is the commit message Kinds-of-Intelligence-CFI#21: removed bad files # This is the commit message Kinds-of-Intelligence-CFI#22: bug fix # This is the commit message Kinds-of-Intelligence-CFI#23: fixed configs
Kinds-of-Intelligence-CFI · Apr 8, 2024 · 864887d · 864887d
1 parent e53f999
commit 864887d
Show file tree

Hide file tree

Showing 11,885 changed files with 148,527 additions and 139,538 deletions.
diff --git a/animalai/animalai/envs/alearner_ae.py b/animalai/animalai/envs/alearner_ae.py
@@ -1,6 +1,5 @@
 from collections import defaultdict
 import numpy as np
-from scipy.special import softmax
 import random
 from itertools import groupby
 
@@ -9,11 +8,10 @@ class ALearnerAE():
     """Implements the A-learning algorithm
     Can change the number of rays but only responds to GOODGOALs, GOODGOALMULTI and BADGOAL"""
 
-    def __init__(self, n_actions, alpha_w=0.5, alpha_v=0.5, temperature=100):
+    def __init__(self, n_actions, alpha_w=0.5, alpha_v=0.5, epsilon=0.8):
         self.alpha_w = alpha_w
         self.alpha_v = alpha_v
-        self.temperature = temperature
-        self.initial_temperature = temperature
+        self.epsilon = epsilon
 
         self.w_values = defaultdict(float)
         self.sr_values = defaultdict(float)
@@ -38,19 +36,15 @@ def get_action(self, stimulus) -> int:
             map(lambda k: self.sr_values[k], all_keys),
             dtype=float
         )
-        probs = softmax(all_sr_values / self.temperature)
-        draw = random.random()
-        action = 0
-        cum_prob = 0
-        for prob in probs:
-            cum_prob += prob
-            if draw <= cum_prob:
-                break
-            # this checks the edge case when there are rounding errors
-            if action < self.n_actions - 1:
-                action += 1
 
+        draw = random.random()
+        if draw <= self.epsilon:
+            max_idx = np.argmax(all_sr_values)
+            action = all_keys[max_idx][1]
+        else:
+            action = random.randrange(0, self.n_actions)
         self.trajectory.append((self.prev_stim, action))
+
         return action
 
     def update_stimulus_values(self, final_stim):
@@ -80,18 +74,7 @@ def update_stimulus_values(self, final_stim):
             next_stim = stim
         self.trajectory = []
 
-    def decrease_temperature(self):
-        if self.temperature > 10:
-            self.temperature -= 10
-        else:
-            self.temperature = 1
-
-    def exploit(self):
-        self.temperature = 1
-
-    def reset_temperature(self):
-        self.temperature = self.initial_temperature
-
     def print_max_stim_val(self):
-        max_stim_value = max(self.w_values.values())
-        print("Max stimulus value: %.4f" % max_stim_value)
+        if self.w_values:
+            max_stim_value = max(self.w_values.values())
+            print("Max stimulus value: %.4f" % max_stim_value)
diff --git a/animalai/animalai/envs/alearner_e2e.py b/animalai/animalai/envs/alearner_e2e.py
@@ -6,11 +6,9 @@
 import numpy as np
 import torch as th
 import torch.nn as nn
-import torch.nn.functional as F
 from torch.utils.data import DataLoader
-from scipy.special import softmax
+# from scipy.special import softmax
 import random
-from itertools import groupby
 import os
 
 
@@ -40,16 +38,25 @@ class ALearnerE2E():
 
     def __init__(self, n_actions, in_channels,
                  in_width, in_height, gpu=True,
-                 temperature=100,
-                 discount=0.7,
+                 discount=0.5,
+                 future_discount=0.5,
+                 epsilon=0.8,
+                 future=False,
                  model_file=None):
         self.in_channels = in_channels
         self.in_width = in_width
         self.in_height = in_height
 
-        self.temperature = temperature
-        self.initial_temperature = temperature
-        self.discount = discount
+        self.future = future
+
+        if self.future:
+            self.discount = 0
+            self.future_discount = future_discount
+        else:
+            self.discount = discount
+            self.future_discount = 0
+
+        self.epsilon = epsilon
 
         self.w_values = defaultdict(float)
         self.sr_values = defaultdict(float)
@@ -74,12 +81,13 @@ def __init__(self, n_actions, in_channels,
                             map_location=th.device('cpu')
                             ))
 
-        self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
-                                       weight_decay=1e-5)
-        # self.optimiser = th.optim.SGD(self.aler.parameters(), lr=0.01,
-        #                               momentum=0.9, nesterov=True)
+        # self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
+        #                                weight_decay=1e-5)
+        self.optimiser = th.optim.SGD(self.aler.parameters(), lr=0.01,
+                                      momentum=0.9, nesterov=True)
         # self.criterion = nn.MSELoss()
         self.criterion = nn.MSELoss(reduction='none')
+        # self.cross_entropy = nn.BCELoss()
 
     def reset_optimiser(self):
         self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
@@ -94,7 +102,7 @@ def set_target_value(self):
     def get_stimulus(self, obs):
         return self.aler(obs)
 
-    def get_action(self, obs, reward=None) -> int:
+    def get_action(self, obs, reward=None, print_probs=False) -> int:
         """Returns the action to take given the current observation"""
         with th.no_grad():
             stim = self.aler(obs)
@@ -116,19 +124,13 @@ def get_action(self, obs, reward=None) -> int:
             map(lambda k: self.sr_values[k], all_keys),
             dtype=float
         )
-        probs = softmax(all_sr_values / self.temperature)
+
         draw = random.random()
-        action = 0
-        cum_prob = 0
-        for prob in probs:
-            cum_prob += prob
-            if draw <= cum_prob:
-                break
-            # this checks the edge case when there are rounding errors
-            if action < self.n_actions - 1:
-                action += 1
-        # max_idx = np.argmax(all_sr_values)
-        # action = all_keys[max_idx][1]
+        if draw <= self.epsilon:
+            max_idx = np.argmax(all_sr_values)
+            action = all_keys[max_idx][1]
+        else:
+            action = random.randrange(0, self.n_actions)
 
         return stim, action
 
@@ -161,56 +163,86 @@ def do_training_round(self, data):
         print("\ndoing training round")
         for i in range(self.n_epochs):
             total_loss = 0
+            # total_l3 = 0
             steps = 0
-            for (imgs, actions, w_vals, u_vals,
+            for (imgs, actions, next_stim, w_vals, u_vals,
                  weights, W_vals, U_vals) in iter(loader):
             # for imgs, actions, w_vals, u_vals in iter(loader):
                 stimuli = self.aler(imgs)
                 output = self.aler(stimulus=stimuli)
                 w_values = output[:, [0]]
                 sr_values = th.gather(output, 1, (actions+1))
 
-                l1 = th.mean(
-                    weights * self.criterion(w_values,
-                                             self.discount *
-                                             th.max(w_vals + u_vals,
-                                                    W_vals + U_vals))
-                )
-
-                l2 = th.mean(
-                    weights * self.criterion(sr_values,
-                                             self.discount *
-                                             th.max(w_vals + u_vals,
-                                                    W_vals + U_vals))
-                )
-                loss = (l1 + l2) / 2
+                mask = (stimuli.detach().clone() != next_stim).any(dim=1)
+
+                # l1 = th.mean(weights[mask] *
+                #              self.criterion(w_values[mask],
+                #                             self.discount *
+                #                             th.max(w_vals[mask] + u_vals[mask],
+                #                                    W_vals[mask] + U_vals[mask])
+                #                             ))
+                l1 = th.mean(weights[mask] *
+                             self.criterion(w_values[mask],
+                                            self.discount *
+                                            (w_vals[mask] + u_vals[mask])
+                                            + self.future_discount *
+                                            (W_vals[mask] + U_vals[mask])))
+
+                # l2 = th.mean(weights *
+                #              self.criterion(sr_values,
+                #                             self.discount *
+                #                             th.max(w_vals + u_vals,
+                #                                    W_vals + U_vals)))
+                l2 = th.mean(weights *
+                             self.criterion(sr_values,
+                                            self.discount *
+                                            (w_vals + u_vals)
+                                            + self.future_discount *
+                                            (W_vals + U_vals)))
+
+                if not th.isnan(l1):
+                    loss = l1 / self.n_actions + l2
+                else:
+                    loss = l2
 
                 self.optimiser.zero_grad()
                 loss.backward()
+
+                # nn.utils.clip_grad_norm_(self.aler.parameters(), 0.01)
                 self.optimiser.step()
 
                 total_loss += loss.item()
+                # total_l3 += l3.item()
                 steps += 1
             print("epoch %d | loss = %.4e" % (i+1, total_loss / steps))
+            # print("epoch %d | l3 = %.4e" % (i+1, total_l3 / steps))
+
+            if self.use_target_value:
+                aler = ALearningModel(self.in_channels,
+                                      self.in_width,
+                                      self.in_height)
+                if self.gpu:
+                    aler = aler.to(0)
+                aler.load_state_dict(self.aler.state_dict())
+                dataset.update_aler(aler)
         print("\n")
 
-    def decrease_temperature(self):
-        if self.temperature > 10:
-            self.temperature -= 10
-        else:
-            self.temperature = 1
-
-    def exploit(self):
-        self.temperature = 1
+        if self.n_epochs < 10:
+            self.n_epochs += 1
 
-    def reset_temperature(self):
-        self.temperature = self.initial_temperature
+        if self.discount < 0.5:
+            self.discount += 0.1
+        else:
+            self.discount = 0.5
 
     def print_max_stim_val(self):
         max_stim_value = max(self.w_values.values())
+        min_stim_value = min(self.w_values.values())
         max_sr_value = max(self.sr_values.values())
-        print("Max stimulus value: %.4f" % max_stim_value)
-        print("Max S-R value: %.4f" % max_sr_value)
+        min_sr_value = min(self.sr_values.values())
+        print("Stimulus values: [%.4f, %.4f]" %
+              (min_stim_value, max_stim_value))
+        print("S-R values: [%.4f, %.4f]" % (min_sr_value, max_sr_value))
 
     def save_model(self):
         th.save(self.aler.state_dict(), self.model_file)
diff --git a/animalai/animalai/envs/e2e_architecture.py b/animalai/animalai/envs/e2e_architecture.py
@@ -6,7 +6,7 @@
 KERNEL_SIZE = 3
 STRIDE = 2
 PADDING = 1
-DROPOUT = 0.1
+DROPOUT = 0.15
 N_CHANNELS = 64
 N_HIDDEN_FEATURES = 256
 N_STIMULI = 30
@@ -69,63 +69,75 @@ def __init__(self, in_channels, in_width, in_height):
         self.in_channels = in_channels
         self.in_width = in_width
         self.in_height = in_height
-
-        # self.visual_processor = nn.Sequential(
-        #     nn.BatchNorm3d(self.in_channels),
-        #     nn.ReLU(),
-        #     nn.Conv3d(self.in_channels, N_CHANNELS,
-        #               KERNEL_SIZE, STRIDE, PADDING),
-        #     nn.BatchNorm3d(N_CHANNELS),
-        #     nn.ReLU(),
-        #     nn.Conv3d(N_CHANNELS, N_CHANNELS,
-        #               KERNEL_SIZE, STRIDE, PADDING),
-        #     nn.BatchNorm3d(N_CHANNELS),
-        #     nn.ReLU(),
-        #     nn.Conv3d(N_CHANNELS, N_CHANNELS,
-        #               KERNEL_SIZE, STRIDE, PADDING),
-        #     nn.BatchNorm3d(N_CHANNELS),
-        #     nn.ReLU(),
-        #     nn.Conv3d(N_CHANNELS, N_CHANNELS,
-        #               KERNEL_SIZE, STRIDE, PADDING),
-        #     nn.BatchNorm3d(N_CHANNELS),
-        #     nn.ReLU(),
-        #     nn.Conv3d(N_CHANNELS, N_CHANNELS,
-        #               KERNEL_SIZE, STRIDE, PADDING),
-        #     nn.BatchNorm3d(N_CHANNELS),
-        #     nn.ReLU(),
-        #     nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
-        #                  stride=STRIDE, padding=(0, PADDING, PADDING))
-        # )
+        # self.tau = tau
 
         self.visual_processor = nn.Sequential(
-            ResBlock3D(self.in_channels, N_CHANNELS),
-            ResBlock3D(N_CHANNELS, N_CHANNELS),
-            ResBlock3D(N_CHANNELS, N_CHANNELS),
-            ResBlock3D(N_CHANNELS, N_CHANNELS),
-            ResBlock3D(N_CHANNELS, N_CHANNELS),
+            nn.BatchNorm3d(self.in_channels),
+            nn.ReLU(),
+            nn.Conv3d(self.in_channels, N_CHANNELS,
+                      KERNEL_SIZE, STRIDE, PADDING),
+            nn.BatchNorm3d(N_CHANNELS),
+            nn.ReLU(),
+            nn.Conv3d(N_CHANNELS, N_CHANNELS,
+                      KERNEL_SIZE, STRIDE, PADDING),
+            nn.BatchNorm3d(N_CHANNELS),
+            nn.ReLU(),
+            nn.Conv3d(N_CHANNELS, N_CHANNELS,
+                      KERNEL_SIZE, STRIDE, PADDING),
+            nn.BatchNorm3d(N_CHANNELS),
+            nn.ReLU(),
+            nn.Conv3d(N_CHANNELS, N_CHANNELS,
+                      KERNEL_SIZE, STRIDE, PADDING),
+            nn.BatchNorm3d(N_CHANNELS),
+            nn.ReLU(),
+            nn.Conv3d(N_CHANNELS, N_CHANNELS,
+                      KERNEL_SIZE, STRIDE, PADDING),
             nn.BatchNorm3d(N_CHANNELS),
             nn.ReLU(),
             nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
                          stride=STRIDE, padding=(0, PADDING, PADDING))
         )
 
+        # self.visual_processor = nn.Sequential(
+        #     ResBlock3D(self.in_channels, N_CHANNELS),
+        #     ResBlock3D(N_CHANNELS, N_CHANNELS),
+        #     ResBlock3D(N_CHANNELS, N_CHANNELS),
+        #     ResBlock3D(N_CHANNELS, N_CHANNELS),
+        #     ResBlock3D(N_CHANNELS, N_CHANNELS),
+        #     nn.BatchNorm3d(N_CHANNELS),
+        #     nn.ReLU(),
+        #     nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
+        #                  stride=STRIDE, padding=(0, PADDING, PADDING))
+        # )
+
         self.softmax_layer = nn.Sequential(
             nn.Linear(N_HIDDEN_FEATURES, N_STIMULI),
-            nn.LayerNorm(N_STIMULI),
             nn.LogSoftmax(dim=1)
         )
 
+        # self.stimulus_output = nn.Sequential(
+        #     nn.Linear(N_HIDDEN_FEATURES, N_STIMULI),
+        #     nn.LayerNorm(N_STIMULI),
+        #     nn.Tanh()
+        # )
+
         # self.lhs = nn.Linear(N_STIMULI, 1)
         # self.rhs = nn.Linear(N_STIMULI + N_ACTIONS, 1)
         self.output = nn.Linear(N_STIMULI, 1 + N_ACTIONS)
 
+    # def decrease_tau(self):
+    #     if self.tau > 10:
+    #         self.tau -= 10
+    #     else:
+    #         self.tau = 1
+
     def forward(self, *args, **kwds):
         if len(args) == 1:
             img = args[0]
             encoded = self.visual_processor(img)
             encoded = th.reshape(encoded, (encoded.shape[0], -1))
 
-            stimulus = F.gumbel_softmax(self.softmax_layer(encoded))
+            stimulus = F.gumbel_softmax(self.softmax_layer(encoded), hard=True)
             # stimulus = self.stimulus_output(encoded)
             return stimulus
         elif len(args) == 0 and len(kwds) == 1: