From 5e41c0631c6b266cd524a7fa3c012eedb9ca9a5f Mon Sep 17 00:00:00 2001 From: Benjamin Jiang Date: Tue, 10 Apr 2018 17:37:43 -0400 Subject: [PATCH 1/4] Just working on adding info --- slots/slots.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/slots/slots.py b/slots/slots.py index d5786d4..562dd47 100755 --- a/slots/slots.py +++ b/slots/slots.py @@ -512,4 +512,10 @@ def pull(self, i): return 0.0 def info(self): - pass +# Default: display number of bandits, probabilities and payouts +# (NOT YET IMPLEMENTED) + if self.live: + return(num_bandits, probs) + else: + return None +# pass From 5678d5c163b9e85fea13cb53d4a3fdd586211a02 Mon Sep 17 00:00:00 2001 From: Benjamin Jiang Date: Tue, 17 Apr 2018 14:41:09 -0400 Subject: [PATCH 2/4] Added multiple trial functionality, info --- slots/slots.py | 139 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 126 insertions(+), 13 deletions(-) diff --git a/slots/slots.py b/slots/slots.py index 562dd47..e803d12 100755 --- a/slots/slots.py +++ b/slots/slots.py @@ -39,7 +39,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=True, stop_criterion : dict Stopping criterion (str) and threshold value (float). ''' - + self.num_bandits = num_bandits self.choices = [] if not probs: @@ -327,20 +327,47 @@ def best(self): else: return np.argmax(self.wins/(self.pulls+0.1)) - def est_payouts(self): + def current(self): ''' - Calculate current estimate of average payout for each bandit. + Return last choice of bandit. Returns ------- - array of floats or None + int + Index of bandit ''' if len(self.choices) < 1: print('slots: No trials run so far.') return None else: - return self.wins/(self.pulls+0.1) + return self.choices[-1] + + def est_payouts(self, bandit=None): + ''' + Calculate current estimate of average payout for each bandit. + + Parameters + ---------- + bandit : None + If a bandit is selected, return the payout for that bandit, otherwise return all payouts. + + Returns + ------- + array of floats or None + ''' + if not bandit: + if len(self.choices) < 1: + print('slots: No trials run so far.') + return None + else: + return self.wins/(self.pulls+0.1) + else: + if len(self.choices) < 1: + print('slots: No trials run so far.') + return None + else: + return (self.wins/(self.pulls+0.1))[bandit] def regret(self): ''' @@ -430,6 +457,95 @@ def online_trial(self, bandit=None, payout=None, strategy='eps_greedy', return {'new_trial': True, 'choice': self.run_strategy(strategy, parameters), 'best': self.best()} + + + # The next two are my implementations of inserting a whole array into the model so you're not manually iterating it + def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', + parameters=None): + ''' + Feeds two arrays in and based on those results returns the next trial. + This really isn't optimized, there's a much better way of doing this if we don't + care about maintaining the + + Parameters + ---------- + bandit : array of ints + Bandit index + payout : array of floats + Payout value + strategy : string + Name of update strategy + parameters : dict + Parameters for update strategy function + + Returns + ------- + dict + Format: {'new_trial': boolean, 'choice': int, 'best': int} + ''' + if len(payouts) is not len(bandits): + raise Exception('slots.online_trials: number of bandits is different from number of payouts') + else: + for x in range(0,len(payouts)): + if bandits[x] is not None and payouts[x] is not None: + self.update(bandit=bandits[x], payout=payouts[x]) + else: + raise Exception('slots.online_trial: bandit and/or payout value' + ' missing.') + + if self.crit_met(): + return {'new_trial': False, 'choice': self.best(), + 'best': self.best()} + else: + return {'new_trial': True, + 'choice': self.run_strategy(strategy, parameters), + 'best': self.best()} + + + + def multiple_trials_bulk(self, bandits=None, payouts=None, strategy='eps_greedy', + parameters=None): + ''' + Feeds two arrays in and based on those results returns the next trial. + This really isn't optimized, there's a much better way of doing this if we don't + care about maintaining the + + Parameters + ---------- + bandit : array of ints + Bandit index + payout : array of floats + Payout value + strategy : string + Name of update strategy + parameters : dict + Parameters for update strategy function + + Returns + ------- + dict + Format: {'new_trial': boolean, 'choice': int, 'best': int} + ''' + + #do I need an error here in case someone puts a string or some shit? + banditos = np.array(bandits) + if len(payouts) is not len(bandits): + raise Exception('slots.online_trials: number of bandits is different from number of payouts') + else: + self.choices.append(bandits) + for y in list(set(bandits)): + indices = np.where(banditos == y)[0] + payola = [payouts[i] for i in indices] + self.pulls[y] += len(payola) + self.wins[y] += sum(payola) + self.bandits.payouts[y] += sum(payola) + if self.crit_met(): + return {'new_trial': False, 'choice': self.best(), + 'best': self.best()} + else: + return {'new_trial': True, + 'choice': self.run_strategy(strategy, parameters), + 'best': self.best()} def update(self, bandit, payout): ''' @@ -451,6 +567,11 @@ def update(self, bandit, payout): self.wins[bandit] += payout self.bandits.payouts[bandit] += payout + def info(self): + ''' + Default: display number of bandits, wins, and estimated probabilities + ''' + return('number of bandits:',self.num_bandits, 'number of wins', self.wins, 'estimated payouts', self.est_payouts()) class Bandits(): ''' @@ -511,11 +632,3 @@ def pull(self, i): else: return 0.0 - def info(self): -# Default: display number of bandits, probabilities and payouts -# (NOT YET IMPLEMENTED) - if self.live: - return(num_bandits, probs) - else: - return None -# pass From 6b32268991bcc638f63fe94efdc3fa70ac8a5036 Mon Sep 17 00:00:00 2001 From: Benjamin Jiang Date: Fri, 20 Apr 2018 17:03:00 -0400 Subject: [PATCH 3/4] Some list/array comprehension stuff --- slots/slots.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/slots/slots.py b/slots/slots.py index e803d12..6ecfa3b 100755 --- a/slots/slots.py +++ b/slots/slots.py @@ -465,7 +465,7 @@ def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', ''' Feeds two arrays in and based on those results returns the next trial. This really isn't optimized, there's a much better way of doing this if we don't - care about maintaining the + care about maintaining the workflow. Parameters ---------- @@ -483,7 +483,9 @@ def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', dict Format: {'new_trial': boolean, 'choice': int, 'best': int} ''' - if len(payouts) is not len(bandits): + bandits = bandits.values + payouts = payouts.values + if len(payouts) != len(bandits): raise Exception('slots.online_trials: number of bandits is different from number of payouts') else: for x in range(0,len(payouts)): @@ -507,8 +509,6 @@ def multiple_trials_bulk(self, bandits=None, payouts=None, strategy='eps_greedy' parameters=None): ''' Feeds two arrays in and based on those results returns the next trial. - This really isn't optimized, there's a much better way of doing this if we don't - care about maintaining the Parameters ---------- @@ -526,13 +526,13 @@ def multiple_trials_bulk(self, bandits=None, payouts=None, strategy='eps_greedy' dict Format: {'new_trial': boolean, 'choice': int, 'best': int} ''' - - #do I need an error here in case someone puts a string or some shit? banditos = np.array(bandits) - if len(payouts) is not len(bandits): + payouts = payouts.values + bandits = bandits.values + if len(payouts) != len(bandits): raise Exception('slots.online_trials: number of bandits is different from number of payouts') else: - self.choices.append(bandits) + self.choices.extend(list(bandits)) for y in list(set(bandits)): indices = np.where(banditos == y)[0] payola = [payouts[i] for i in indices] @@ -571,7 +571,7 @@ def info(self): ''' Default: display number of bandits, wins, and estimated probabilities ''' - return('number of bandits:',self.num_bandits, 'number of wins', self.wins, 'estimated payouts', self.est_payouts()) + return('number of bandits:',self.num_bandits, 'number of wins:', self.wins, 'estimated payouts:', self.est_payouts()) class Bandits(): ''' From 86d88349a3be3d4084e79ae23750c5136b06d925 Mon Sep 17 00:00:00 2001 From: Benjamin Jiang Date: Sun, 22 Apr 2018 15:16:59 -0400 Subject: [PATCH 4/4] Multiple Trials condense, track all payout values So I combined my two multiple trial methods into a 'hard' and a 'lazy' option, and I'm also starting to track each payout value. This definitely increases the size of the object, so maybe add an option to disable that, but it's important to the next step which is to add a time-sliding scale. --- slots/slots.py | 79 +++++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 53 deletions(-) diff --git a/slots/slots.py b/slots/slots.py index 6ecfa3b..9d9930d 100755 --- a/slots/slots.py +++ b/slots/slots.py @@ -41,7 +41,7 @@ def __init__(self, num_bandits=3, probs=None, payouts=None, live=True, ''' self.num_bandits = num_bandits self.choices = [] - + self.payout_values = [] if not probs: if not payouts: if live: @@ -135,6 +135,7 @@ def _run(self, strategy, parameters=None): choice = self.run_strategy(strategy, parameters) self.choices.append(choice) payout = self.bandits.pull(choice) + self.payout_values.append(payout) if payout is None: print('Trials exhausted. No more values for bandit', choice) return None @@ -459,8 +460,7 @@ def online_trial(self, bandit=None, payout=None, strategy='eps_greedy', 'best': self.best()} - # The next two are my implementations of inserting a whole array into the model so you're not manually iterating it - def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', + def multiple_trials(self, bandits=None, payouts=None, method = 'hard', strategy='eps_greedy', parameters=None): ''' Feeds two arrays in and based on those results returns the next trial. @@ -473,6 +473,10 @@ def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', Bandit index payout : array of floats Payout value + method : string + Name of summing strategy + If 'hard' then it manually iterates over each row + If 'lazy' it attempts to sum it as an array and only add final product strategy : string Name of update strategy parameters : dict @@ -488,12 +492,24 @@ def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', if len(payouts) != len(bandits): raise Exception('slots.online_trials: number of bandits is different from number of payouts') else: - for x in range(0,len(payouts)): - if bandits[x] is not None and payouts[x] is not None: - self.update(bandit=bandits[x], payout=payouts[x]) - else: - raise Exception('slots.online_trial: bandit and/or payout value' - ' missing.') + if method == 'hard': + for x in range(0,len(payouts)): + if bandits[x] is not None and payouts[x] is not None: + self.update(bandit=bandits[x], payout=payouts[x]) + else: + raise Exception('slots.online_trial: bandit and/or payout value' + ' missing.') + + else if method = 'lazy': + banditos = np.array(bandits) + self.choices.extend(list(bandits)) + self.payout_values.extend(list(payouts)) + for y in list(set(bandits)): + indices = np.where(banditos == y)[0] + payola = [payouts[i] for i in indices] + self.pulls[y] += len(payola) + self.wins[y] += sum(payola) + self.bandits.payouts[y] += sum(payola) if self.crit_met(): return {'new_trial': False, 'choice': self.best(), @@ -504,49 +520,6 @@ def multiple_trials(self, bandits=None, payouts=None, strategy='eps_greedy', 'best': self.best()} - - def multiple_trials_bulk(self, bandits=None, payouts=None, strategy='eps_greedy', - parameters=None): - ''' - Feeds two arrays in and based on those results returns the next trial. - - Parameters - ---------- - bandit : array of ints - Bandit index - payout : array of floats - Payout value - strategy : string - Name of update strategy - parameters : dict - Parameters for update strategy function - - Returns - ------- - dict - Format: {'new_trial': boolean, 'choice': int, 'best': int} - ''' - banditos = np.array(bandits) - payouts = payouts.values - bandits = bandits.values - if len(payouts) != len(bandits): - raise Exception('slots.online_trials: number of bandits is different from number of payouts') - else: - self.choices.extend(list(bandits)) - for y in list(set(bandits)): - indices = np.where(banditos == y)[0] - payola = [payouts[i] for i in indices] - self.pulls[y] += len(payola) - self.wins[y] += sum(payola) - self.bandits.payouts[y] += sum(payola) - if self.crit_met(): - return {'new_trial': False, 'choice': self.best(), - 'best': self.best()} - else: - return {'new_trial': True, - 'choice': self.run_strategy(strategy, parameters), - 'best': self.best()} - def update(self, bandit, payout): ''' Update bandit trials and payouts for given bandit. @@ -561,7 +534,7 @@ def update(self, bandit, payout): ------- None ''' - + self.payout_values.append(payout) self.choices.append(bandit) self.pulls[bandit] += 1 self.wins[bandit] += payout