Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added multiple trial functionality, other minor changes #9

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 126 additions & 7 deletions slots/slots.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=True,
stop_criterion : dict
Stopping criterion (str) and threshold value (float).
'''

self.num_bandits = num_bandits
self.choices = []

if not probs:
Expand Down Expand Up @@ -327,20 +327,47 @@ def best(self):
else:
return np.argmax(self.wins/(self.pulls+0.1))

def est_payouts(self):
def current(self):
'''
Calculate current estimate of average payout for each bandit.
Return last choice of bandit.

Returns
-------
array of floats or None
int
Index of bandit
'''

if len(self.choices) < 1:
print('slots: No trials run so far.')
return None
else:
return self.wins/(self.pulls+0.1)
return self.choices[-1]

def est_payouts(self, bandit=None):
'''
Calculate current estimate of average payout for each bandit.

Parameters
----------
bandit : None
If a bandit is selected, return the payout for that bandit, otherwise return all payouts.

Returns
-------
array of floats or None
'''
if not bandit:
if len(self.choices) < 1:
print('slots: No trials run so far.')
return None
else:
return self.wins/(self.pulls+0.1)
else:
if len(self.choices) < 1:
print('slots: No trials run so far.')
return None
else:
return (self.wins/(self.pulls+0.1))[bandit]

def regret(self):
'''
Expand Down Expand Up @@ -430,6 +457,95 @@ def online_trial(self, bandit=None, payout=None, strategy='eps_greedy',
return {'new_trial': True,
'choice': self.run_strategy(strategy, parameters),
'best': self.best()}


# The next two are my implementations of inserting a whole array into the model so you're not manually iterating it
def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy',
parameters=None):
'''
Feeds two arrays in and based on those results returns the next trial.
This really isn't optimized, there's a much better way of doing this if we don't
care about maintaining the

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maintaining the what?


Parameters
----------
bandit : array of ints
Bandit index
payout : array of floats
Payout value
strategy : string
Name of update strategy
parameters : dict
Parameters for update strategy function

Returns
-------
dict
Format: {'new_trial': boolean, 'choice': int, 'best': int}
'''
if len(payouts) is not len(bandits):
raise Exception('slots.online_trials: number of bandits is different from number of payouts')
else:
for x in range(0,len(payouts)):
if bandits[x] is not None and payouts[x] is not None:
self.update(bandit=bandits[x], payout=payouts[x])
else:
raise Exception('slots.online_trial: bandit and/or payout value'
' missing.')

if self.crit_met():
return {'new_trial': False, 'choice': self.best(),
'best': self.best()}
else:
return {'new_trial': True,
'choice': self.run_strategy(strategy, parameters),
'best': self.best()}



def multiple_trials_bulk(self, bandits=None, payouts=None, strategy='eps_greedy',
parameters=None):
'''
Feeds two arrays in and based on those results returns the next trial.
This really isn't optimized, there's a much better way of doing this if we don't
care about maintaining the

Parameters
----------
bandit : array of ints
Bandit index
payout : array of floats
Payout value
strategy : string
Name of update strategy
parameters : dict
Parameters for update strategy function

Returns
-------
dict
Format: {'new_trial': boolean, 'choice': int, 'best': int}
'''

#do I need an error here in case someone puts a string or some shit?
banditos = np.array(bandits)
if len(payouts) is not len(bandits):
raise Exception('slots.online_trials: number of bandits is different from number of payouts')
else:
self.choices.append(bandits)
for y in list(set(bandits)):
indices = np.where(banditos == y)[0]
payola = [payouts[i] for i in indices]
self.pulls[y] += len(payola)
self.wins[y] += sum(payola)
self.bandits.payouts[y] += sum(payola)
if self.crit_met():
return {'new_trial': False, 'choice': self.best(),
'best': self.best()}
else:
return {'new_trial': True,
'choice': self.run_strategy(strategy, parameters),
'best': self.best()}

def update(self, bandit, payout):
'''
Expand All @@ -451,6 +567,11 @@ def update(self, bandit, payout):
self.wins[bandit] += payout
self.bandits.payouts[bandit] += payout

def info(self):
'''
Default: display number of bandits, wins, and estimated probabilities
'''
return('number of bandits:',self.num_bandits, 'number of wins', self.wins, 'estimated payouts', self.est_payouts())

class Bandits():
'''
Expand Down Expand Up @@ -511,5 +632,3 @@ def pull(self, i):
else:
return 0.0

def info(self):
pass