#55 Added 2 new states (cash, holdings) removed mrdr, included always…

… buy benchmark, and new reward definition
AmoghM · Dec 5, 2020 · 34d0626 · 34d0626
1 parent bf641c0
commit 34d0626
Show file tree

Hide file tree

Showing 4 changed files with 592 additions and 251 deletions.
diff --git a/src/data_process.py b/src/data_process.py
@@ -141,43 +141,6 @@ def get_adj_close_sma_ratio(values, window):
     ratio = values/rm
     return ratio.apply(lambda x: round(x, 5))
 
-def get_mrdr(values,baseline,test=True):
-    '''
-    Returns the market relative daily return over the window:
-    INPUTS:
-    values(pandas series)
-    window(int): time period to consider 
-    test(bool): whether this is testing period (and only the past few days' data is needed) - this flag will speed up testing
-    OUTPUTS:
-    market relative daily return(series)
-    '''
-    if test:
-        valnew = values.iloc[-3:]
-        mx = valnew.index.max()
-        gspc_temp = baseline[baseline.index <= mx].iloc[-3:]
-
-    else:
-        valnew = values
-        mx = valnew.index.max()
-        gspc_temp = baseline[baseline.index <= mx]
-
-    gspc = gspc_temp.reindex(valnew.index).fillna(method='ffill')
-
-    checkhist(gspc,valnew)
-
-    if not gspc.index.equals(valnew.index):
-        raise ValueError('Stock indecies do not match')
-
-    gspc_rets = returns(gspc)
-    stock_rets = returns(valnew)
-
-    mrdr = stock_rets / gspc_rets
-
-    if not gspc.index.equals(valnew.index) and values.index[-1] == mrdr.index[-1]:
-        raise ValueError('Stock indecies do not match')
-
-    return mrdr
-
 
 def discretize(values, num_states=4):
     '''
@@ -198,6 +161,26 @@ def discretize(values, num_states=4):
     states_value[num_states] = float('inf')
     return states_value
 
+def create_cash_and_holdings_quantiles():
+    # CASH (State 3)
+    cash_list = [*range(1,10)]
+    cash_list = [int(180000/9)*each for each in cash_list]
+
+    cash_states_values = {}
+    for i in range(len(cash_list)):
+    cash_states_values[i] = cash_list[i]
+    cash_states_values[9] = float("inf")
+
+    # HOLDINGS = Num Shares (State 4)
+    shares_list = [*range(1,10)]
+    shares_list = [int(252/9)*each for each in shares_list]
+
+    shares_states_values = {}
+    for i in range(len(shares_list)):
+    shares_states_values[i] = shares_list[i]
+    shares_states_values[9] = float("inf")
+
+    return cash_states_values, shares_states_values
 
 def value_to_state(value, states_value):
     '''
@@ -232,15 +215,12 @@ def create_df(df, window=45):
     close_sma_ratio = get_adj_close_sma_ratio(df['Adj Close'], window)
     # get the upper and lower BB values
     upper, lower = get_upper_lower_bands(df['Adj Close'], window)
-    # get mrdr
     baseline = read_stock('^GSPC','2007-01-01','2016-12-31')
-    mrdr = get_mrdr(df['Adj Close'],baseline,test=False)
 
     # create bb measure, close-sma-ratio columns
     df['close_sma_ratio'] = close_sma_ratio
     df['upper_bb'] = upper
     df['lower_bb'] = lower
-    df['mrdr'] = mrdr
 
     # drop missing values
     df.dropna(inplace=True)
@@ -276,13 +256,11 @@ def get_states(df):
     }
 
     close_sma_ratio_states_value = discretize(df['norm_close_sma_ratio'])
-
-    mrdr_value = discretize(df['mrdr'])
 
-    return percent_b_states_values, close_sma_ratio_states_value, mrdr_value
+    return percent_b_states_values, close_sma_ratio_states_value
 
 
-def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value,mrdr_value):
+def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
     '''
     Add a new column to hold the state information to the dataframe
     Inputs:
@@ -293,20 +271,17 @@ def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value,mr
     Output:
     df(dataframe)
     '''
-    df['percent_b_state'] = df['percent_b'].apply(
-        lambda x: value_to_state(x, percent_b_states_values))
-    df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(
-        lambda x: value_to_state(x, close_sma_ratio_states_value))
-    df['mrdr_state'] = df['mrdr'].apply(
-        lambda x: value_to_state(x, mrdr_value))
-
-    df['state'] = df['norm_close_sma_ratio_state'] + df['percent_b_state'] + df['mrdr_state']
-
+    #df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2 
+    df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value))
+    df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values))
+    #df['norm_adj_close_state'] = df['norm_adj_close'].apply(lambda x : value_to_state(x, price_states_value))
+
+    #df['state'] = df['norm_close_sma_ratio_state'] + df['norm_bb_width_state']
+    df['state'] = df['norm_close_sma_ratio_state'] + df['percent_b_state']
     df.dropna(inplace=True)
     return df
 
-
-def get_all_states(percent_b_states_values, close_sma_ratio_states_value,mrdr_value):
+def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values):
     '''
     Combine all the states from the discretized 
     norm_adj_close, norm_close_sma_ratio columns.
@@ -320,10 +295,11 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value,mrdr_va
     states = []
     for c, _ in close_sma_ratio_states_value.items():
         for b, _ in percent_b_states_values.items():
-            for m, _ in mrdr_value.items():
-                state = str(c) + str(b) + str(m)
-                states.append(str(state))
-
+          for m, _ in cash_states_values.items():
+            for s, _ in shares_states_values.items(): 
+              state =  str(c) + str(b) + str(m) + str(s)
+              states.append(str(state))
+
     return states
 
 def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length):

diff --git a/src/sim.py b/src/sim.py
@@ -16,7 +16,7 @@
 #TODO: make this a single function call
 ticker = 'JPM'
 
-np.random.seed(0)
+np.random.seed(1000)
 start = '2017-01-01'
 end = '2019-12-31'
 
@@ -26,10 +26,14 @@
 q, bb_states_value, SMA_ratio_quantiles, MRDR_values = tu.trainqlearner(start_date, end_date, ticker)
 q.columns = ['HOLD', 'BUY', 'SELL']
 bb_ = list(bb_states_value.values())
+
+print(bb_)
+
 sma_ = list(SMA_ratio_quantiles.values())
 mrdr_ = list(MRDR_values.values())
 
 # Fixing the range problem
+
 # q.iloc[0] = q.iloc[0] * 1e-16
 #nq = (q - q.mean()) / q.std()
 nq=q
@@ -297,6 +301,90 @@ def rule_based(stock_table,money,inc, original_shares,commission):
     return results
 
 
+# function to buy stock every day
+def buy_always(stock_table,money,inc,original_shares,commission):
+    '''
+    enacts buy_always strategy
+
+    Inputs
+    stock_table: list of daily stock or portfolio values
+    money: original cash held
+    inc: increment of buy/sell permitted
+    original_shares: original number of shares held
+
+    Output
+    results: dictionary holding...
+        *one Pandas series each (key/Series names are identical) for*
+        final_vals: final daily values of portfolio
+        actions: daily actions taken ("BUY" "SELL" "HOLD")
+        shares: daily number of shares of stock held
+        cash: daily amount of cash held
+
+        *additionally*
+        qtable: returns None (does not apply to this strategy)
+    '''
+
+    # record original value
+    original_val = money + (stock_table.values[0]*original_shares) # initial cash
+
+    # generate table of returns
+    ret = returns(stock_table)
+
+    # create actions table
+    actions = ['HOLD']
+
+    # create shares table
+    shares = stock_table.copy()
+    shares.iloc[0] = original_shares
+
+    # create markov transition matrix
+    markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
+
+    # create cash table
+    cash = stock_table.copy()
+    cash.iloc[0] = money
+
+    # calculate daily portfolio value
+    final_vals = stock_table.copy()
+    final_vals.iloc[0] = original_val
+
+    # iterate through days
+    for i in range(1,stock_table.shape[0]):
+        j = i-1 # last day
+        cur_cash = cash.values[j] # current cash
+        cur_shares = shares.values[j] # current shares
+        final_vals.iloc[i] = cur_cash + (cur_shares*stock_table.values[i]) # end of day portfolio value
+        cur_price = stock_table.values[j]
+
+        # if you can't buy, hold
+        if cur_cash < (cur_price*inc):
+            act = 'HOLD'
+
+	# else buy
+        else:
+            act = 'BUY'
+
+        # take action
+        if act == 'HOLD':
+            cash.iloc[i] = cash.values[j]
+            shares.iloc[i] = shares.values[j]
+        if act == 'BUY':
+            cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission
+            shares.iloc[i] = shares.values[j] + inc
+
+        actions += [act]
+
+        # increment markov
+        markov.loc[actions[j],actions[i]] +=1
+
+    actions = pd.Series(actions,index=stock_table.index)
+
+    # normalize markov
+    markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
+
+    results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history':None}
+    return results
+
 # function to choose action based on OLS of returns looking back to trading days t-6 to t-1
 def ols(stock_table,money,inc, original_shares,commission):
     '''
@@ -412,7 +500,7 @@ def ols(stock_table,money,inc, original_shares,commission):
     return results
 
 # def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window):
-def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=3): # defining defaults here prevents need for args to be passed in return_stats function
+def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
     '''
     Enacts qlearning
 
@@ -439,6 +527,8 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
     '''
 
     # record original value
+    print(stock_table[1])
+
     original_val = money + (stock_table.values[0]*original_shares) # initial cash
 
     # generate table of returns
@@ -479,14 +569,19 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
         if i > window: # if we have enough of a lookback window to calculate stats
 
             # find yesterday's final bollinger band value
-            bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j]
+            upper, lower = d.get_upper_lower_bands(stock_table.iloc[:i], window)
+            bb = ((stock_table.iloc[:i] - lower) * 100 / (upper - lower)).iloc[j]
+
+
+            #bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j]
 
             # find yesterday's final bollinger band quantile
             if bb != float('inf'):
                 bbq = np.argwhere(np.where(BB_quantiles>bb,1,0))[0][0]
             else:
                 bbq = len(BB_quantiles) - 1
 
+
             # find current SMA value
             sma = d.get_adj_close_sma_ratio(stock_table.iloc[:i],window).iloc[j]
 
@@ -506,7 +601,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
                 mrq = len(MRDR_quantiles) - 1
 
             # find state based on these two pieces of information
-            state = str(bbq) + str(smq) + str(mrq)
+            state =  str(smq) + str(bbq) + str(mrq)
 
             # locate *optimal* action from Q table, which we will then examine to see if it's possible
 #             print("STATE: ", state, str(bbq), str(smq))
@@ -532,7 +627,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
 
         # if you can't buy or sell, hold
         if cur_shares < inc and cur_cash < (cur_price*inc):
-            cur_act = 'HOLD'
+            act = 'HOLD'
 
         # if you can't sell, but you can buy... buy if it makes sense, or hold if it doesn't
         elif cur_shares < inc:
@@ -572,12 +667,12 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
     return results
 
 # function to return stats and graphs
-def return_stats(stock='jpm',
+def return_stats(stock='aapl',
                  commission = 2,
                  money=100000,
                  #inc=10,- can read this argument and change code below if doing absolute share-based
                  #original_shares=100, - can read this argument and change code below if doing absolute share-based
-                 policies=[hold,random_action,rule_based,ols,qlearner]):
+                 policies=[hold,random_action,rule_based,ols,buy_always,qlearner]):
 
     '''
     Enacts every strategy and provides summary statistics and graphs
@@ -628,7 +723,7 @@ def return_stats(stock='jpm',
             quantile_length = len(results[policy.__name__]['BB_quantiles'])
             qtab = results[policy.__name__]['qtable']
 
-            qtab_bb = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
+            qtab_bb = weighted_average_and_normalize(qtab, state_history, 1, quantile_length)
             qtab_bb = qtab_bb.iloc[::-1] # reverse order of rows for visualization purposes - now biggest value will be on top
             qtab_bb.index = np.round(np.flip(np.array(results[policy.__name__]['BB_quantiles'])),5) # define index as bb quantiles, reversing quantile order in kind so biggest value is first
 
@@ -646,7 +741,7 @@ def return_stats(stock='jpm',
 
             # marginalize over SMA
             # TODO - determine if this mean was taken correctly
-            qtab_sma = weighted_average_and_normalize(qtab, state_history, 1, quantile_length)
+            qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
             qtab_sma = qtab_sma.iloc[::-1]
             qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),5)