Skip to content

Commit

Permalink
# This is a combination of 23 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

ALife 2024 version

# This is the commit message Kinds-of-Intelligence-CFI#2:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#3:

added test frequency

# This is the commit message Kinds-of-Intelligence-CFI#4:

new config files

# This is the commit message Kinds-of-Intelligence-CFI#5:

bug fix

# This is the commit message #6:

renamed directories

# This is the commit message Kinds-of-Intelligence-CFI#7:

made necessary changes for data output

# This is the commit message Kinds-of-Intelligence-CFI#8:

bug fix

# This is the commit message #9:

buf fix

# This is the commit message Kinds-of-Intelligence-CFI#10:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#11:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#12:

bug fix

# This is the commit message #13:

bug fix

# This is the commit message #14:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#15:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#16:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#17:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#18:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#19:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#20:

work around issue

# This is the commit message Kinds-of-Intelligence-CFI#21:

removed bad files

# This is the commit message Kinds-of-Intelligence-CFI#22:

bug fix

# This is the commit message Kinds-of-Intelligence-CFI#23:

fixed configs
  • Loading branch information
rppc committed Apr 8, 2024
1 parent e53f999 commit 864887d
Show file tree
Hide file tree
Showing 11,885 changed files with 148,527 additions and 139,538 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
41 changes: 12 additions & 29 deletions animalai/animalai/envs/alearner_ae.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections import defaultdict
import numpy as np
from scipy.special import softmax
import random
from itertools import groupby

Expand All @@ -9,11 +8,10 @@ class ALearnerAE():
"""Implements the A-learning algorithm
Can change the number of rays but only responds to GOODGOALs, GOODGOALMULTI and BADGOAL"""

def __init__(self, n_actions, alpha_w=0.5, alpha_v=0.5, temperature=100):
def __init__(self, n_actions, alpha_w=0.5, alpha_v=0.5, epsilon=0.8):
self.alpha_w = alpha_w
self.alpha_v = alpha_v
self.temperature = temperature
self.initial_temperature = temperature
self.epsilon = epsilon

self.w_values = defaultdict(float)
self.sr_values = defaultdict(float)
Expand All @@ -38,19 +36,15 @@ def get_action(self, stimulus) -> int:
map(lambda k: self.sr_values[k], all_keys),
dtype=float
)
probs = softmax(all_sr_values / self.temperature)
draw = random.random()
action = 0
cum_prob = 0
for prob in probs:
cum_prob += prob
if draw <= cum_prob:
break
# this checks the edge case when there are rounding errors
if action < self.n_actions - 1:
action += 1

draw = random.random()
if draw <= self.epsilon:
max_idx = np.argmax(all_sr_values)
action = all_keys[max_idx][1]
else:
action = random.randrange(0, self.n_actions)
self.trajectory.append((self.prev_stim, action))

return action

def update_stimulus_values(self, final_stim):
Expand Down Expand Up @@ -80,18 +74,7 @@ def update_stimulus_values(self, final_stim):
next_stim = stim
self.trajectory = []

def decrease_temperature(self):
if self.temperature > 10:
self.temperature -= 10
else:
self.temperature = 1

def exploit(self):
self.temperature = 1

def reset_temperature(self):
self.temperature = self.initial_temperature

def print_max_stim_val(self):
max_stim_value = max(self.w_values.values())
print("Max stimulus value: %.4f" % max_stim_value)
if self.w_values:
max_stim_value = max(self.w_values.values())
print("Max stimulus value: %.4f" % max_stim_value)
136 changes: 84 additions & 52 deletions animalai/animalai/envs/alearner_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from scipy.special import softmax
# from scipy.special import softmax
import random
from itertools import groupby
import os


Expand Down Expand Up @@ -40,16 +38,25 @@ class ALearnerE2E():

def __init__(self, n_actions, in_channels,
in_width, in_height, gpu=True,
temperature=100,
discount=0.7,
discount=0.5,
future_discount=0.5,
epsilon=0.8,
future=False,
model_file=None):
self.in_channels = in_channels
self.in_width = in_width
self.in_height = in_height

self.temperature = temperature
self.initial_temperature = temperature
self.discount = discount
self.future = future

if self.future:
self.discount = 0
self.future_discount = future_discount
else:
self.discount = discount
self.future_discount = 0

self.epsilon = epsilon

self.w_values = defaultdict(float)
self.sr_values = defaultdict(float)
Expand All @@ -74,12 +81,13 @@ def __init__(self, n_actions, in_channels,
map_location=th.device('cpu')
))

self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
weight_decay=1e-5)
# self.optimiser = th.optim.SGD(self.aler.parameters(), lr=0.01,
# momentum=0.9, nesterov=True)
# self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
# weight_decay=1e-5)
self.optimiser = th.optim.SGD(self.aler.parameters(), lr=0.01,
momentum=0.9, nesterov=True)
# self.criterion = nn.MSELoss()
self.criterion = nn.MSELoss(reduction='none')
# self.cross_entropy = nn.BCELoss()

def reset_optimiser(self):
self.optimiser = th.optim.Adam(self.aler.parameters(), lr=0.001,
Expand All @@ -94,7 +102,7 @@ def set_target_value(self):
def get_stimulus(self, obs):
return self.aler(obs)

def get_action(self, obs, reward=None) -> int:
def get_action(self, obs, reward=None, print_probs=False) -> int:
"""Returns the action to take given the current observation"""
with th.no_grad():
stim = self.aler(obs)
Expand All @@ -116,19 +124,13 @@ def get_action(self, obs, reward=None) -> int:
map(lambda k: self.sr_values[k], all_keys),
dtype=float
)
probs = softmax(all_sr_values / self.temperature)

draw = random.random()
action = 0
cum_prob = 0
for prob in probs:
cum_prob += prob
if draw <= cum_prob:
break
# this checks the edge case when there are rounding errors
if action < self.n_actions - 1:
action += 1
# max_idx = np.argmax(all_sr_values)
# action = all_keys[max_idx][1]
if draw <= self.epsilon:
max_idx = np.argmax(all_sr_values)
action = all_keys[max_idx][1]
else:
action = random.randrange(0, self.n_actions)

return stim, action

Expand Down Expand Up @@ -161,56 +163,86 @@ def do_training_round(self, data):
print("\ndoing training round")
for i in range(self.n_epochs):
total_loss = 0
# total_l3 = 0
steps = 0
for (imgs, actions, w_vals, u_vals,
for (imgs, actions, next_stim, w_vals, u_vals,
weights, W_vals, U_vals) in iter(loader):
# for imgs, actions, w_vals, u_vals in iter(loader):
stimuli = self.aler(imgs)
output = self.aler(stimulus=stimuli)
w_values = output[:, [0]]
sr_values = th.gather(output, 1, (actions+1))

l1 = th.mean(
weights * self.criterion(w_values,
self.discount *
th.max(w_vals + u_vals,
W_vals + U_vals))
)

l2 = th.mean(
weights * self.criterion(sr_values,
self.discount *
th.max(w_vals + u_vals,
W_vals + U_vals))
)
loss = (l1 + l2) / 2
mask = (stimuli.detach().clone() != next_stim).any(dim=1)

# l1 = th.mean(weights[mask] *
# self.criterion(w_values[mask],
# self.discount *
# th.max(w_vals[mask] + u_vals[mask],
# W_vals[mask] + U_vals[mask])
# ))
l1 = th.mean(weights[mask] *
self.criterion(w_values[mask],
self.discount *
(w_vals[mask] + u_vals[mask])
+ self.future_discount *
(W_vals[mask] + U_vals[mask])))

# l2 = th.mean(weights *
# self.criterion(sr_values,
# self.discount *
# th.max(w_vals + u_vals,
# W_vals + U_vals)))
l2 = th.mean(weights *
self.criterion(sr_values,
self.discount *
(w_vals + u_vals)
+ self.future_discount *
(W_vals + U_vals)))

if not th.isnan(l1):
loss = l1 / self.n_actions + l2
else:
loss = l2

self.optimiser.zero_grad()
loss.backward()

# nn.utils.clip_grad_norm_(self.aler.parameters(), 0.01)
self.optimiser.step()

total_loss += loss.item()
# total_l3 += l3.item()
steps += 1
print("epoch %d | loss = %.4e" % (i+1, total_loss / steps))
# print("epoch %d | l3 = %.4e" % (i+1, total_l3 / steps))

if self.use_target_value:
aler = ALearningModel(self.in_channels,
self.in_width,
self.in_height)
if self.gpu:
aler = aler.to(0)
aler.load_state_dict(self.aler.state_dict())
dataset.update_aler(aler)
print("\n")

def decrease_temperature(self):
if self.temperature > 10:
self.temperature -= 10
else:
self.temperature = 1

def exploit(self):
self.temperature = 1
if self.n_epochs < 10:
self.n_epochs += 1

def reset_temperature(self):
self.temperature = self.initial_temperature
if self.discount < 0.5:
self.discount += 0.1
else:
self.discount = 0.5

def print_max_stim_val(self):
max_stim_value = max(self.w_values.values())
min_stim_value = min(self.w_values.values())
max_sr_value = max(self.sr_values.values())
print("Max stimulus value: %.4f" % max_stim_value)
print("Max S-R value: %.4f" % max_sr_value)
min_sr_value = min(self.sr_values.values())
print("Stimulus values: [%.4f, %.4f]" %
(min_stim_value, max_stim_value))
print("S-R values: [%.4f, %.4f]" % (min_sr_value, max_sr_value))

def save_model(self):
th.save(self.aler.state_dict(), self.model_file)
82 changes: 47 additions & 35 deletions animalai/animalai/envs/e2e_architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
KERNEL_SIZE = 3
STRIDE = 2
PADDING = 1
DROPOUT = 0.1
DROPOUT = 0.15
N_CHANNELS = 64
N_HIDDEN_FEATURES = 256
N_STIMULI = 30
Expand Down Expand Up @@ -69,63 +69,75 @@ def __init__(self, in_channels, in_width, in_height):
self.in_channels = in_channels
self.in_width = in_width
self.in_height = in_height

# self.visual_processor = nn.Sequential(
# nn.BatchNorm3d(self.in_channels),
# nn.ReLU(),
# nn.Conv3d(self.in_channels, N_CHANNELS,
# KERNEL_SIZE, STRIDE, PADDING),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.Conv3d(N_CHANNELS, N_CHANNELS,
# KERNEL_SIZE, STRIDE, PADDING),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.Conv3d(N_CHANNELS, N_CHANNELS,
# KERNEL_SIZE, STRIDE, PADDING),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.Conv3d(N_CHANNELS, N_CHANNELS,
# KERNEL_SIZE, STRIDE, PADDING),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.Conv3d(N_CHANNELS, N_CHANNELS,
# KERNEL_SIZE, STRIDE, PADDING),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
# stride=STRIDE, padding=(0, PADDING, PADDING))
# )
# self.tau = tau

self.visual_processor = nn.Sequential(
ResBlock3D(self.in_channels, N_CHANNELS),
ResBlock3D(N_CHANNELS, N_CHANNELS),
ResBlock3D(N_CHANNELS, N_CHANNELS),
ResBlock3D(N_CHANNELS, N_CHANNELS),
ResBlock3D(N_CHANNELS, N_CHANNELS),
nn.BatchNorm3d(self.in_channels),
nn.ReLU(),
nn.Conv3d(self.in_channels, N_CHANNELS,
KERNEL_SIZE, STRIDE, PADDING),
nn.BatchNorm3d(N_CHANNELS),
nn.ReLU(),
nn.Conv3d(N_CHANNELS, N_CHANNELS,
KERNEL_SIZE, STRIDE, PADDING),
nn.BatchNorm3d(N_CHANNELS),
nn.ReLU(),
nn.Conv3d(N_CHANNELS, N_CHANNELS,
KERNEL_SIZE, STRIDE, PADDING),
nn.BatchNorm3d(N_CHANNELS),
nn.ReLU(),
nn.Conv3d(N_CHANNELS, N_CHANNELS,
KERNEL_SIZE, STRIDE, PADDING),
nn.BatchNorm3d(N_CHANNELS),
nn.ReLU(),
nn.Conv3d(N_CHANNELS, N_CHANNELS,
KERNEL_SIZE, STRIDE, PADDING),
nn.BatchNorm3d(N_CHANNELS),
nn.ReLU(),
nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
stride=STRIDE, padding=(0, PADDING, PADDING))
)

# self.visual_processor = nn.Sequential(
# ResBlock3D(self.in_channels, N_CHANNELS),
# ResBlock3D(N_CHANNELS, N_CHANNELS),
# ResBlock3D(N_CHANNELS, N_CHANNELS),
# ResBlock3D(N_CHANNELS, N_CHANNELS),
# ResBlock3D(N_CHANNELS, N_CHANNELS),
# nn.BatchNorm3d(N_CHANNELS),
# nn.ReLU(),
# nn.AvgPool3d((1, KERNEL_SIZE, KERNEL_SIZE),
# stride=STRIDE, padding=(0, PADDING, PADDING))
# )

self.softmax_layer = nn.Sequential(
nn.Linear(N_HIDDEN_FEATURES, N_STIMULI),
nn.LayerNorm(N_STIMULI),
nn.LogSoftmax(dim=1)
)

# self.stimulus_output = nn.Sequential(
# nn.Linear(N_HIDDEN_FEATURES, N_STIMULI),
# nn.LayerNorm(N_STIMULI),
# nn.Tanh()
# )

# self.lhs = nn.Linear(N_STIMULI, 1)
# self.rhs = nn.Linear(N_STIMULI + N_ACTIONS, 1)
self.output = nn.Linear(N_STIMULI, 1 + N_ACTIONS)

# def decrease_tau(self):
# if self.tau > 10:
# self.tau -= 10
# else:
# self.tau = 1

def forward(self, *args, **kwds):
if len(args) == 1:
img = args[0]
encoded = self.visual_processor(img)
encoded = th.reshape(encoded, (encoded.shape[0], -1))

stimulus = F.gumbel_softmax(self.softmax_layer(encoded))
stimulus = F.gumbel_softmax(self.softmax_layer(encoded), hard=True)
# stimulus = self.stimulus_output(encoded)
return stimulus
elif len(args) == 0 and len(kwds) == 1:
Expand Down
Loading

0 comments on commit 864887d

Please sign in to comment.