From e19b89ed518c727d75287d5dc71831fb3d4aa034 Mon Sep 17 00:00:00 2001
From: Mariem Ayadi <mayadi@smith.edu>
Date: Mon, 7 Dec 2020 00:02:57 -0500
Subject: [PATCH] #55 Full integration post debugging and sync with test
 section

---
 src/data_process.py       |  95 +++++----
 src/sim.py                | 405 +++++++++++++++++++-------------------
 src/trainqlearner_util.py | 320 +++++-------------------------
 3 files changed, 307 insertions(+), 513 deletions(-)

diff --git a/src/data_process.py b/src/data_process.py
index 583f150..0a9f31f 100644
--- a/src/data_process.py
+++ b/src/data_process.py
@@ -111,23 +111,6 @@ def get_stock_data(symbol, start, end):
     return df
 
 
-def get_bollinger_bands(values, window):
-    '''
-    Return upper and lower Bollinger Bands.
-    INPUTS:
-    values(pandas series)
-    window(int): time period to consider
-    OUTPUS:
-    band_width(pandas series)
-    '''
-    #  rolling mean
-    rm = values.rolling(window=window).mean()
-    rstd = values.rolling(window=window).std()
-
-    band_width = rm / rstd
-    return band_width.apply(lambda x: round(x, 5))
-
-
 def get_adj_close_sma_ratio(values, window):
     '''
     Return the ratio of adjusted closing value to the simple moving average.
@@ -142,7 +125,7 @@ def get_adj_close_sma_ratio(values, window):
     return ratio.apply(lambda x: round(x, 5))
 
 
-def discretize(values, num_states=4):
+def discretize(values, num_states=9):
     '''
     Convert continuous values to integer state
     Inputs:
@@ -200,7 +183,7 @@ def value_to_state(value, states_value):
         return 'value out of range'
 
 
-def create_df(df, window=45):
+def create_df(df, window=5):
     '''
     Create a dataframe with the normalized predictors
     norm_bb_width, norm_adj_close, norm_close_sma_ratio
@@ -260,7 +243,7 @@ def get_states(df):
     return percent_b_states_values, close_sma_ratio_states_value
 
 
-def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
+def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value):
     '''
     Add a new column to hold the state information to the dataframe
     Inputs:
@@ -271,8 +254,6 @@ def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
     Output:
     df(dataframe)
     '''
-    percent_b_states_values, close_sma_ratio_states_value = get_states(df)
-
     #df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2 
     df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value))
     df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values))
@@ -297,17 +278,47 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_s
     states = []
     for c, _ in close_sma_ratio_states_value.items():
         for b, _ in percent_b_states_values.items():
-          for m, _ in cash_states_values.items():
-            for s, _ in shares_states_values.items(): 
-              state =  str(c) + str(b) + str(m) + str(s)
-              states.append(str(state))
-    
+            for m, _ in cash_states_values.items():
+                for s, _ in shares_states_values.items():
+                    state = str(c) + str(b) + str(m) + str(s)
+                    states.append(str(state))
+
     return states
 
+# def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length):
+#     '''
+#     takes a q table and does a weighted average group by given the input state_number (what digit number it is in the state)
+#
+#     Inputs:
+#     qtable: the qtable (DataFrame)
+#     state_history: the state history (Series)
+#     state_num: the number digit that indicates the state
+#     quantile_length: the number of quantiles we built this out with
+#     '''
+#     qtab_2 = pd.merge(qtable,pd.Series(state_history,name='state_history'),'inner',left_index=True,right_index=True)
+#
+#     sh = qtab_2['state_history']
+#     qtab_2 = qtab_2.drop(columns=['state_history']).multiply(qtab_2['state_history'],axis=0)
+#
+#     qtab_2 = pd.merge(qtab_2,sh,'inner',left_index=True,right_index=True)
+#
+#     qtab_2['state'] = qtab_2.index.str.slice(state_num,state_num+1)
+#
+#     qtab_3 = qtab_2.groupby('state').sum()
+#
+#     qtab_4 = qtab_3.divide(qtab_3['state_history'],axis=0).drop(columns='state_history')
+#
+#     qtab_5 = qtab_4.reindex([str(i) for i in range(quantile_length)])
+#
+#     #normalize by max
+#     qtab_6 = qtab_5.divide(qtab_5.max(axis=1),axis=0)
+#
+#     return qtab_6
+
 def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length):
     '''
     takes a q table and does a weighted average group by given the input state_number (what digit number it is in the state)
-    
+
     Inputs:
     qtable: the qtable (DataFrame)
     state_history: the state history (Series)
@@ -315,24 +326,26 @@ def weighted_average_and_normalize(qtable,state_history,state_num,quantile_lengt
     quantile_length: the number of quantiles we built this out with
     '''
     qtab_2 = pd.merge(qtable,pd.Series(state_history,name='state_history'),'inner',left_index=True,right_index=True)
-    
+
+    # reverse normalization: qtab_2['state_history'] = 1/qtab_2['state_history']
+
     sh = qtab_2['state_history']
     qtab_2 = qtab_2.drop(columns=['state_history']).multiply(qtab_2['state_history'],axis=0)
-    
+
     qtab_2 = pd.merge(qtab_2,sh,'inner',left_index=True,right_index=True)
-    
+
     qtab_2['state'] = qtab_2.index.str.slice(state_num,state_num+1)
-    
-    qtab_3 = qtab_2.groupby('state').sum()
-    
-    qtab_4 = qtab_3.divide(qtab_3['state_history'],axis=0).drop(columns='state_history')
-    
+
+
+    qtab_3 = qtab_2.groupby('state').sum().drop(columns='state_history')
+
+
+    qtab_4 = qtab_3.divide(qtab_3.abs().sum(axis=1),axis=0)
+
+
     qtab_5 = qtab_4.reindex([str(i) for i in range(quantile_length)])
-    
-    #normalize by max
-    qtab_6 = qtab_5.divide(qtab_5.max(axis=1),axis=0)
-    
-    return qtab_6
+
+    return qtab_5
     
     
                                            
diff --git a/src/sim.py b/src/sim.py
index 96f9289..0bfcd42 100644
--- a/src/sim.py
+++ b/src/sim.py
@@ -23,28 +23,27 @@
 start_date = dt.datetime(2007, 1, 1)
 end_date = dt.datetime(2016, 12, 31)
 
-print("START")
-q, bb_states_value, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles = tu.trainqlearner(start_date, end_date, ticker)
-print("END")
-q.columns = ['HOLD', 'BUY', 'SELL']
-bb_ = list(bb_states_value.values())
-sma_ = list(SMA_ratio_quantiles.values())
 
-# Fixing the range problem
+q, train_actions_history, train_returns_since_entry, percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values = tu.trainqlearner(ticker, start_date, end_date, window = 5, gamma = 0.95, episodes = 100, sh = 20)
 
-# q.iloc[0] = q.iloc[0] * 1e-16
-#nq = (q - q.mean()) / q.std()
+q.columns = ['HOLD', 'BUY', 'SELL']
 nq=q
 nq.columns = ['HOLD', 'BUY', 'SELL']
 
 action_list = ['BUY','HOLD','SELL']
 nq = nq[action_list]
 
-# nq = nq.div(nq.abs().max(axis=1), axis=0) forgoing normalization
+test_df = d.get_stock_data(ticker, start, end)
+test_df = d.create_df(test_df, 5)
+test_df = d.create_state_df(test_df, percent_b_states_values , close_sma_ratio_states_value)
+temp = test_df.iloc[:-1, :-1]
+test_df = np.array(test_df[['Adj Close', 'state']])
+
+
+
 
 
-# function to hold every day
-def hold(stock_table,money,inc,original_shares,commission,):
+def hold(stock_table,money,inc,original_shares,commission):
     '''
     Enacts hold-every-day strategy
 
@@ -68,6 +67,7 @@ def hold(stock_table,money,inc,original_shares,commission,):
 
     # calculate daily returns
     ret = returns(stock_table)
+    original_shares = original_shares + inc
 
     # dummy calculations to reset to initialize return calculations
     # what this does is just sets the first entry of the returns Series to total value of stock held originally
@@ -89,10 +89,10 @@ def hold(stock_table,money,inc,original_shares,commission,):
 
     # add original cash to this
     final_vals += money
-    
+
     # create markov transition matrix
     markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
-    
+
     markov.loc['HOLD','HOLD']=1
 
     results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history': None}
@@ -134,7 +134,7 @@ def random_action(stock_table,money,inc,original_shares,commission):
     # create shares table
     shares = stock_table.copy()
     shares.iloc[0] = original_shares
-    
+
     # create markov transition matrix
     markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
 
@@ -182,12 +182,12 @@ def random_action(stock_table,money,inc,original_shares,commission):
             shares.iloc[i] = shares.values[j] - inc
 
         actions += [act]
-        
+
         # increment markov
         markov.loc[actions[j],actions[i]] +=1
 
     actions = pd.Series(actions,index=stock_table.index)
-    
+
     # normalize markov
     markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
 
@@ -234,7 +234,7 @@ def rule_based(stock_table,money,inc, original_shares,commission):
     # create cash table
     cash = stock_table.copy()
     cash.iloc[0] = money
-    
+
     # create markov transition matrix
     markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
 
@@ -286,15 +286,15 @@ def rule_based(stock_table,money,inc, original_shares,commission):
             shares.iloc[i] = shares.values[j] - inc
 
         actions += [act]
-        
+
         # increment markov
         markov.loc[actions[j],actions[i]] +=1
 
     actions = pd.Series(actions,index=stock_table.index)
-    
+
     # normalize markov
     markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
-    
+
     results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov, 'state_history': None}
     return results
 
@@ -334,7 +334,7 @@ def buy_always(stock_table,money,inc,original_shares,commission):
     # create shares table
     shares = stock_table.copy()
     shares.iloc[0] = original_shares
-    
+
     # create markov transition matrix
     markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
 
@@ -357,8 +357,6 @@ def buy_always(stock_table,money,inc,original_shares,commission):
         # if you can't buy, hold
         if cur_cash < (cur_price*inc):
             act = 'HOLD'
-	
-	# else buy
         else:
             act = 'BUY'
 
@@ -369,14 +367,14 @@ def buy_always(stock_table,money,inc,original_shares,commission):
         if act == 'BUY':
             cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission
             shares.iloc[i] = shares.values[j] + inc
-  
+
         actions += [act]
-        
+
         # increment markov
         markov.loc[actions[j],actions[i]] +=1
 
     actions = pd.Series(actions,index=stock_table.index)
-    
+
     # normalize markov
     markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
 
@@ -429,13 +427,13 @@ def ols(stock_table,money,inc, original_shares,commission):
     # calculate daily portfolio value
     final_vals = stock_table.copy()
     final_vals.iloc[0] = original_val
-    
+
     # create markov transition matrix
     markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
 
     # iterate through days
     for i in range(1,stock_table.shape[0]):
-    
+
         j = i-1 # last day
         cur_cash = cash.values[j] # current cash
         cur_shares = shares.values[j] # current shares
@@ -488,185 +486,124 @@ def ols(stock_table,money,inc, original_shares,commission):
 
         # increment markov
         markov.loc[actions[j],actions[i]] +=1
-        
+
     actions = pd.Series(actions,index=stock_table.index)
-    
+
     # normalize markov
     markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
-    
+
     results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history': None}
     return results
 
 # def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window):
-def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
-    '''
-    Enacts qlearning
-
-    Inputs
-    stock_table: list of daily stock or portfolio values
-    money: original cash held
-    inc: increment of buy/sell permitted
-    original_shares: original number of shares held
-    qtable: input qtable (Pandas dataframe, columns are "BUY SELL HOLD", rows are states)
-    BB_quantiles: quantiles of Bollinger bands
-    SMA_quantiles: quantiles of SMA
-    window: lookback window
-
-    Output
-    results: dictionary holding...
-        *one Pandas series each (key/Series names are identical) for*
-        final_vals: final daily values of portfolio
-        actions: daily actions taken ("BUY" "SELL" "HOLD")
-        shares: daily number of shares of stock held
-        cash: daily amount of cash held
 
-        *additionally*
-        qtable: pandas dataframe formatted the same as the input dataframe (will be identical)
+def qlearner(stock_table,money,inc, original_shares, commission, q_table = nq, test_data = test_df,  percent_b_states_values = percent_b_states_values, close_sma_ratio_states_value = close_sma_ratio_states_value, cash_states_values = cash_states_values, shares_states_values = shares_states_values, temp = temp):
     '''
-
-    # record original value
-    print(stock_table[1])
-
-    original_val = money + (stock_table.values[0]*original_shares) # initial cash
-
-    # generate table of returns
-    ret = returns(stock_table)
-
-    # create actions table
-    actions = ['HOLD']
-
-    # create shares table
-    shares = stock_table.copy()
-    shares.iloc[0] = original_shares
-
-    # create cash table
-    cash = stock_table.copy()
-    cash.iloc[0] = money
-
-    # calculate daily portfolio value
-    final_vals = stock_table.copy()
-    final_vals.iloc[0] = original_val
-
+    Evaluate the Q-table
+    Inputs:
+    test_data(dataframe)
+    q(dataframe): trained Q-table
+    Output:
+    actions_history(dict): has everydays' actions and close price
+    returns_since_entry(list): contains every day's return since entry
+    '''
+    current_portfolio_value = []
+    cash = money
+    num_shares = original_shares
+    curr_cash = []
+    curr_shares = []
+    curr_cash_s = []
+    curr_shares_s = []
+    act_list = []
+    cash_list = []
+    shares_list = []
+    final_states = []
     state_history = {}
+    actions_history =[]
+    for i, val in enumerate(test_data):
+        current_adj_close, state = val
+        try:
+            next_adj_close, next_state = test_data[i + 1]
+        except:
+            print('End of data! Done!')
+            break
 
-    # create markov transition matrix
-    markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)
-    
-    # define baseline for mrdr
-    baseline = read_stock('^GSPC',start,end)
-
-    # iterate through days
-    for i in range(1,stock_table.shape[0]):
-    
-        j = i-1 # last day
-        cur_cash = cash.values[j] # current cash
-        cur_shares = shares.values[j] # current shares
-        final_vals.iloc[i] = cur_cash + (cur_shares*stock_table.values[i]) # end of day portfolio value
-        cur_price = stock_table.values[j]
+        current_cash_state = d.value_to_state(cash, cash_states_values)
+        current_share_state = d.value_to_state(num_shares, shares_states_values)
+        state = state + current_cash_state + current_share_state
 
-        if i > window: # if we have enough of a lookback window to calculate stats
+        final_states.append(state)
+        curr_cash.append(cash)
+        curr_shares.append(num_shares)
+        curr_cash_s.append(current_cash_state)
+        curr_shares_s.append(current_share_state)
 
-            # find yesterday's final bollinger band value
-            upper, lower = d.get_upper_lower_bands(stock_table.iloc[:i], window)
-            bb = ((stock_table.iloc[:i] - lower) * 100 / (upper - lower)).iloc[j]
+        try:
+            state_history[state] += 1
+        except KeyError:
+            state_history[state] = 1
 
+        action = tu.act(state, q_table, threshold=0, actions_size=3)
 
-            #bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j]
 
-            # find yesterday's final bollinger band quantile
-            if bb != float('inf'):
-                bbq = np.argwhere(np.where(BB_quantiles>bb,1,0))[0][0]
+        if action == 0:  # buy
+            if cash > inc * current_adj_close:
+                next_cash = cash - inc * current_adj_close
+                num_shares += inc
+                cash = next_cash
             else:
-                bbq = len(BB_quantiles) - 1
-
+                action = 1
 
-            # find current SMA value
-            sma = d.get_adj_close_sma_ratio(stock_table.iloc[:i],window).iloc[j]
-
-            # find current SMA quantile
-            if sma != float('inf'):
-                smq = np.argwhere(np.where(SMA_quantiles>sma,1,0))[0][0]
-            else:
-                smq = len(SMA_quantiles) - 1
-                
-            # find current SMA value
-            #mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1]
-
-            # find current SMA quantile
-            # if mra != float('inf'):
-            #     mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0]
-            # else:
-            #     mrq = len(MRDR_quantiles) - 1
-
-            # find state based on these two pieces of information
-            #state =  str(smq) + str(bbq) + str(mrq)
-            state =  str(smq) + str(bbq)
-
-            # locate *optimal* action from Q table, which we will then examine to see if it's possible
-#             print("STATE: ", state, str(bbq), str(smq))
-#             print(qtable.loc[state])
-            cur_act = qtable.loc[state].idxmax()
-
-            #maintain a score of state visited
-            '''
-            if state_history.get(state, None) is None:
-                state_history[state] = 0
+        if action == 2:  # sell
+            if num_shares > 0:
+                next_cash = cash + inc * current_adj_close
+                num_shares -= inc
+                cash = next_cash
             else:
-                state_history[state]+=1
-            '''
+                action = 1
 
-            try:
-                state_history[state] += 1
-            except KeyError:
-                state_history[state] = 1
+        if action == 0:
+            act_list.append('BUY')
+        elif action == 2:
+            act_list.append('SELL')
+        else:
+            act_list.append('HOLD')
 
+        actions_history.append((i, current_adj_close, action))
 
-        else: # if we're too early to have a full lookback window
-            cur_act = 'HOLD'
+        cash_list.append(cash)
+        shares_list.append(num_shares)
+        current_portfolio_value.append(cash + num_shares * next_adj_close)
 
-        # if you can't buy or sell, hold
-        if cur_shares < inc and cur_cash < (cur_price*inc):
-            act = 'HOLD'
 
-        # if you can't sell, but you can buy... buy if it makes sense, or hold if it doesn't
-        elif cur_shares < inc:
-            act = 'BUY' if cur_act == 'BUY' else 'HOLD'
 
-        # if you can't buy, but you can sell... sell if it makes sense, or hold if it doesn't
-        elif cur_cash < (cur_price*inc):
-            act = 'SELL' if cur_act == 'SELL' else 'HOLD'
+    markov = pd.DataFrame(np.zeros((3, 3)), index=action_list, columns=action_list)
+    for i in range(1,len(act_list)):
+        markov.loc[act_list[i-1],act_list[i]] +=1
 
-        # otherwise do whatever makes sense
-        else:
-            act = cur_act
+    temp['cash'] = curr_cash
+    temp['cash_state'] = curr_cash_s
+    temp['shares'] = curr_shares
+    temp['shares_state'] = curr_shares_s
+    temp['state'] = final_states
+    temp.to_csv('./data/viz_data.csv')
 
-        # take action
-        if act == 'HOLD':
-            cash.iloc[i] = cash.values[j]
-            shares.iloc[i] = shares.values[j]
-        if act == 'BUY':
-            cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission
-            shares.iloc[i] = shares.values[j] + inc
-        if act == 'SELL':
-            cash.iloc[i] = cash.values[j] + (inc*cur_price) - commission
-            shares.iloc[i] = shares.values[j] - inc
-
-        actions += [act]
 
-        # increment markov
-        markov.loc[actions[j],actions[i]] +=1
-        
-        
-    # normalize markov
-    markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
-    
-    actions = pd.Series(actions,index=stock_table.index)
+    actions = pd.Series(act_list, index=stock_table.index)
+    f_shares = pd.Series(shares_list, index=stock_table.index)
+    f_cash = pd.Series(cash_list, index=stock_table.index)
+    final_vals = pd.Series(current_portfolio_value, index=stock_table.index)
 
-    results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles, 'markov':markov}
+    results = {'final_vals': final_vals, 'actions': actions, 'shares': f_shares, 'cash': f_cash, 'qtable': q_table,
+               'state_history': pd.Series(state_history), 'BB_quantiles': list(percent_b_states_values.values()),
+               'SMA_quantiles': list(close_sma_ratio_states_value.values()),
+               'CASH_quantiles': list(cash_states_values.values()), 'SHARE_quantiles': list(shares_states_values.values()),
+               'markov': markov, 'actions_history' : actions_history}
     return results
 
+
 # function to return stats and graphs
-def return_stats(stock='aapl',
+def return_stats(stock='jpm',
                  commission = 2,
                  money=100000,
                  #inc=10,- can read this argument and change code below if doing absolute share-based
@@ -687,24 +624,29 @@ def return_stats(stock='aapl',
 
     Provides numerous summary statistics and visualizations
     '''
-    
+
     original_money = money
 
     # generate stock table
     stock_table = read_stock(stock,start,end)
 
-    
+
     # note stock name
     stock_name = stock.upper()
 
     # approximate 50/50 split in money-stock
-    original_shares = round(money / 2 / stock_table.values[0])
+    original_shares = 0
 
     # recalculate money accordingly
-    money -= (stock_table.values[0]*original_shares)
+
+    money = original_money
 
     # make share increment about 1% of original share holdings
-    inc = m.ceil(original_shares / 100)
+    inc = 20
+
+    stock_table = stock_table[4:]
+
+
 
     # generate results
     results = {policy.__name__:policy(stock_table,
@@ -713,6 +655,40 @@ def return_stats(stock='aapl',
                                       original_shares = original_shares,
                                      commission = commission) for policy in policies}
 
+    actions_history = results['qlearner']['actions_history']
+
+    days, prices, actions = [], [], []
+    for d, p, a in actions_history:
+        days.append(d)
+        prices.append(p)
+        actions.append(a)
+    hold_d, hold_p, buy_d, buy_p, sell_d, sell_p = [], [], [], [], [], []
+    for d, p, a in actions_history:
+        if a == 0:
+            hold_d.append(d)
+            hold_p.append(p)
+        if a == 1:
+            buy_d.append(d)
+            buy_p.append(p)
+        if a == 2:
+            sell_d.append(d)
+            sell_p.append(p)
+
+    buys = pd.DataFrame(list(zip(hold_d, hold_p)), columns =['Date', 'Adj Close'])
+    sells = pd.DataFrame(list(zip(buy_d, buy_p)), columns =['Date', 'Adj Close'])
+    holds = pd.DataFrame(list(zip(sell_d, buy_p)), columns=['Date', 'Adj Close'])
+
+    buys.to_csv('./data/buy_data.csv')
+    sells.to_csv('./data/sell_data.csv')
+    holds.to_csv('./data/hold_data.csv')
+
+
+
+
+
+
+
+
     # plot qtables only for qlearner (or any other strategies with Q table)
     for policy in policies:
         if results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies
@@ -721,7 +697,7 @@ def return_stats(stock='aapl',
             state_history = results[policy.__name__]['state_history']
             quantile_length = len(results[policy.__name__]['BB_quantiles'])
             qtab = results[policy.__name__]['qtable']
-            
+
             qtab_bb = weighted_average_and_normalize(qtab, state_history, 1, quantile_length)
             qtab_bb = qtab_bb.iloc[::-1] # reverse order of rows for visualization purposes - now biggest value will be on top
             qtab_bb.index = np.round(np.flip(np.array(results[policy.__name__]['BB_quantiles'])),5) # define index as bb quantiles, reversing quantile order in kind so biggest value is first
@@ -740,9 +716,10 @@ def return_stats(stock='aapl',
 
             # marginalize over SMA
             # TODO - determine if this mean was taken correctly
+            quantile_length = len(results[policy.__name__]['SMA_quantiles'])
             qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
             qtab_sma = qtab_sma.iloc[::-1]
-            qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),5)
+            qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),10)
 
             plt.figure(figsize=(9,7))
             fig = heatmap(qtab_sma,cmap='Blues')
@@ -753,23 +730,40 @@ def return_stats(stock='aapl',
             plt.gca().tick_params(axis='x',bottom=False,left=False)
             plt.gca().tick_params(axis='y',bottom=False,left=False)
             plt.show(fig)
-            
-            # marginalize over MRDR
-            # TODO - determine if this mean was taken correctly
-            # qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
-            # qtab_mrdr = qtab_mrdr.iloc[::-1]
-            # qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5)
-            
-            # plt.figure(figsize=(9,7))
-            # fig = heatmap(qtab_mrdr,cmap='Blues')
-            # plt.title('Market Relative Daily Return Q-Table',size=16)
-            # plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white')
-            # plt.xticks(fontsize=15)
-            # plt.yticks(fontsize=14,rotation=0)
-            # plt.gca().tick_params(axis='x',bottom=False,left=False)
-            # plt.gca().tick_params(axis='y',bottom=False,left=False)
-            # plt.show(fig)
-            
+
+            #CASH
+            quantile_length = len(results[policy.__name__]['CASH_quantiles'])
+            qtab_sma = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
+            qtab_sma = qtab_sma.iloc[::-1]
+            qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['CASH_quantiles'])), 10)
+
+            plt.figure(figsize=(9, 7))
+            fig = heatmap(qtab_sma, cmap='Blues')
+            plt.title('CASH Q-Table', size=16)
+            plt.gca().hlines([i + 1 for i in range(len(qtab_sma.index))], xmin=0, xmax=10, linewidth=10, color='white')
+            plt.xticks(fontsize=15)
+            plt.yticks(fontsize=14, rotation=0)
+            plt.gca().tick_params(axis='x', bottom=False, left=False)
+            plt.gca().tick_params(axis='y', bottom=False, left=False)
+            plt.show(fig)
+
+            #SHARES
+            quantile_length = len(results[policy.__name__]['SHARE_quantiles'])
+            qtab_sma = weighted_average_and_normalize(qtab, state_history, 3, quantile_length)
+            qtab_sma = qtab_sma.iloc[::-1]
+            qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SHARE_quantiles'])), 10)
+
+            plt.figure(figsize=(9, 7))
+            fig = heatmap(qtab_sma, cmap='Blues')
+            plt.title('SHARE Q-Table', size=16)
+            plt.gca().hlines([i + 1 for i in range(len(qtab_sma.index))], xmin=0, xmax=10, linewidth=10, color='white')
+            plt.xticks(fontsize=15)
+            plt.yticks(fontsize=14, rotation=0)
+            plt.gca().tick_params(axis='x', bottom=False, left=False)
+            plt.gca().tick_params(axis='y', bottom=False, left=False)
+            plt.show(fig)
+
+
 
     # get markov transition models
     for policy in policies:
@@ -784,8 +778,8 @@ def return_stats(stock='aapl',
         plt.gca().tick_params(axis='y',bottom=False,left=False)
         plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white')
         plt.show(fig)
-        
-        
+
+
     # plot daily portfolio values
     plt.figure(figsize=(14,8))
     for policy in policies:
@@ -828,7 +822,8 @@ def return_stats(stock='aapl',
         try:
             del dic['BB_quantiles']
             del dic['SMA_quantiles']
-            # del dic['MRDR_quantiles']
+            del dic['CASH_quantiles']
+            del dic['SHARE_quantiles']
         except:
             pass
         df = pd.DataFrame(dic)
@@ -877,7 +872,7 @@ def return_stats(stock='aapl',
     rets = {policy:returns(results[policy.__name__]['final_vals']) for policy in policies}
 
     # generate risk_free return for sharpe ratio - five-year treasury yield
-    rfs = returns(read_stock('^FVX'))
+    rfs = returns(read_stock('^FVX')[4:])
 
     # find common indecies between stock tables and treasury yields
     rfn = set(stock_table.index).intersection(set(rfs.index))
@@ -887,7 +882,7 @@ def return_stats(stock='aapl',
     rfi = rfr.index
 
     # generate baseline return for information ratio - s&p 500
-    bls = returns(read_stock('^GSPC')).values
+    bls = returns(read_stock('^GSPC')[4:]).values
 
     # print summary stats for daily returns
     for policy in policies:
diff --git a/src/trainqlearner_util.py b/src/trainqlearner_util.py
index aef5d4f..ec0ecf1 100644
--- a/src/trainqlearner_util.py
+++ b/src/trainqlearner_util.py
@@ -48,23 +48,6 @@ def act(state, q_mat, threshold, actions_size=3):
     return action
 
 
-def get_return_since_entry(bought_history, current_adj_close):
-    '''
-    Calculate the returns of current share holdings.
-    Inputs:
-    bought_history(list)
-    current_adj_close(float)
-    current_day(int)
-    Output:
-    return_since_entry(float)
-    '''
-    return_since_entry = 0.
-
-    for b in bought_history:
-        return_since_entry += (current_adj_close - b)
-    return return_since_entry
-
-
 # In[36]:
 
 
@@ -108,52 +91,10 @@ def visualize_results(actions_history, returns_since_entry):
     ax2.scatter(sell_d, sell_p, color='red', label='sell')
     ax2.legend()
 
-def get_invested_capital(actions_history, returns_since_entry):
-    '''
-    Calculate the max capital being continously invested by the trader
-    Input:
-    actions_history(dict): has everydays' actions and close price
-    returns_since_entry(list): contains every day's return since entry
-    Output:
-    return_invest_ratio(float)
-    '''
-    invest = []
-    total = 0
-    return_invest_ratio = None
-    for i in range(len(actions_history)):
-        a = actions_history[i][2]
-        p = actions_history[i][1]
-
-        try:
-            next_a = actions_history[i+1][2]
-        except:
-            break
-        if a == 1:
-            total += p
-            if next_a != 1 or (i == len(actions_history)-2 and next_a == 1):
-                invest.append(total)
-                total = 0
-    if invest:
-        return_invest_ratio = returns_since_entry[-1]/max(invest)
-        print('invested capital {}, return/invest ratio {}'.format(max(invest),
-                                                                   return_invest_ratio))
-    else:
-        print('no buy transactions, invalid training')
-    return return_invest_ratio
 
-def get_base_return(data):
-    '''
-    Calculate the benchmark returns of a given stock
-    Input:
-    data(dataframe): containing normalized close price and state
-    Output:
-    return/invest ratio(float)
-    '''
-    start_price, _ = data[0]
-    end_price, _ = data[-1]
-    return (end_price - start_price)/start_price
+    plt.show()
 
-def train_q_learning(train_data, q, gamma, episodes,sh):
+def train_q_learning(train_data, q, cash_states_values, shares_states_values, gamma, episodes, sh):
     '''
     Train a Q-table 
     Inputs:
@@ -166,38 +107,53 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
     actions_history(dict): has everydays' actions and close price
     returns_since_entry(list): contains every day's return since entry
     '''
-    # create framework for episode-to-episode Q table change tracking; will track MSE between episodes
-    episode = 0
-    q_cur = q.copy()
-    errs = []
-    episode_decile = episodes//10
-
     # actions_history = []
     # num_shares = 0
     # bought_history = []
     # returns_since_entry = [0]
     # cash = 100000
-    cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
+
+    episode = 0
+    q_cur = q.copy()
+    errs = []
+    episode_decile = episodes//10
+
     alpha = 0.1
     for ii in range(episodes):
+
+        episode += 1
+        if episode == 1 or episode%episode_decile == 0 or episode == episodes:
+            print('Training episode {}'.format(episode))
+
+
         actions_history = []
         cash = 100000
         num_shares = 0
         if ii > 1:
           alpha = alpha*0.99995
         epsilon = 0.8
-        current_portfolio_value = []      
+        current_portfolio_value = []
+
+        #add convergence tracking for episode 1
+        if episode == 1:
+            errs_1 = []
+            q_cur_1 = q.copy()
+
+
+
         for i, val in enumerate(train_data):
             current_adj_close, state = val
             try:
                 next_adj_close, next_state = train_data[i+1]
             except:
                 break
-            
+
+
             current_cash_state = d.value_to_state(cash, cash_states_values)
             current_share_state = d.value_to_state(num_shares, shares_states_values)
             state = state + current_cash_state + current_share_state
 
+
             if i >=1:
               epsilon*= 0.9958
               
@@ -245,18 +201,16 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
             # update q table
             q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))
 
-            print("ARRIVED AT PORTFOLIO VAL")
             current_portfolio_value.append(cash + num_shares*next_adj_close)
 
             # ---- (tentative) start of q-table info plotting/output -----
-            q_cur_1 = q.copy()
             q_last_1 = q_cur_1.copy()
-            
-            
+            q_cur_1 = q.copy()
+
             # add convergence tracking for episode 1
             if episode == 1:
                 MSE_1 = np.sum(np.square(q_cur_1-q_last_1).values)
-                errs_1 += [MSE_1] 
+                errs_1 += [MSE_1]
 
         # add convergence tracking for episode 1
         if episode == 1:
@@ -267,31 +221,30 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
             x_axis = np.array([i+1 for i in range(len(errs_1))])
             plt.plot(x_axis,errs_1)
             plt.show()
-            
+
         # calculate MSE between epsiodes
-        q_cur = q.copy()
         q_last = q_cur.copy()
-        
-            
+        q_cur = q.copy()
+
         # update MSE tracking
         MSE = np.sum(np.square(q_cur - q_last).values)
-        
+
         # plot irregularities
         if episode > 1:
             if MSE > errs[-1]*3:
 
                 print('Episode ' + str(episode) + ' showed irregularity. MSE was ' + str(MSE) + '. Showing big 10 biggest jumps in QTable below.')
- 
+
                 q_diff = (q_cur - q_last).copy()
                 q_diff['colsum'] = q_diff.sum(axis=1)
                 q_diff = q_diff.sort_values('colsum',ascending=False).iloc[:10]
                 print(q_diff.drop(columns=['colsum']))
                 print('\n\n\n\n')
-          
+
         errs += [MSE]
-            
+
     print('End of Training!')
-    
+
     # plot MSE
     plt.figure(figsize=(14,8))
     plt.title('Q Table Stabilization By Episode',size=25)
@@ -299,7 +252,7 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
     plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
     x_axis = np.array([i+1 for i in range(len(errs))])
     plt.plot(x_axis,errs)
-    
+
     # plot MSE for episodes 1-10
     if len(errs) >= 10:
         # plot MSE
@@ -310,7 +263,7 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
         plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
         x_axis = np.array([i+1 for i in range(len(errs_new))])
         plt.plot(x_axis,errs_new)
-        
+
     # plot MSE for episodes 11-end if possible
     if len(errs) >= 10:
         # plot MSE
@@ -325,171 +278,10 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
     return q, actions_history, current_portfolio_value
 
 
-# def train_q_learning(train_data, q, alpha, epsilon, epsilon_decay, gamma, episodes,commission,sell_penalty):
-#     episode = 0
-#     '''
-#     Train a Q-table
-#     Inputs:
-#     train_data(dataframe)
-#     q(dataframe): initial Q-table
-#     epsilon(float): threshold of which action strategy to take
-#     alpha(float): proportion to weight future expected return vs. current return
-#     gamma(float): discount percentage on the future return
-#     commission(float): amount charged for stock transaction
-#     Output:
-#     q(dataframe): Updated Q-table
-#     actions_history(dict): has everydays' actions and close price
-#     returns_since_entry(list): contains every day's return since entry
-#     '''
-#     # create framework for episode-to-episode Q table change tracking; will track MSE between episodes
-#     q_cur = q.copy()
-#     errs = []
-#     episode_decile = episodes//10
-    
-#     for ii in range(episodes):
-#         episode +=1
-#         if episode == 1 or episode%episode_decile == 0 or episode == episodes:
-#             print('Training episode {}'.format(episode))
-#         actions_history = []
-#         num_shares = 0
-#         bought_history = []
-#         returns_since_entry = [0]
-#         days = [0]
-        
-#         # add convergence tracking for episode 1
-#         if episode == 1:
-#             errs_1 = []
-#             q_cur_1 = q.copy()
-        
-#         for i, val in enumerate(train_data):
-#             current_adj_close, state = val
-#             try:
-#                 next_adj_close, next_state = train_data[i+1]
-#             except:
-#                 break
-
-#             if len(bought_history) > 0:
-#                 returns_since_entry.append(get_return_since_entry(
-#                     bought_history, current_adj_close))
-#             else:
-#                 returns_since_entry.append(returns_since_entry[-1])
-
-#             # decide action
-#             '''
-#             if alpha > 0.1:
-#                 alpha = alpha/(i+1)
-#             '''
-#             epsilon*=epsilon_decay
-#             action = act(state, q, threshold=epsilon, actions_size=3)
-
-#             # get reward
-#             if action == 0:  # hold
-#                 if num_shares > 0:
-#                     prev_adj_close, _ = train_data[i-1]
-#                     future = next_adj_close - current_adj_close
-#                     past = current_adj_close - prev_adj_close
-#                     reward = past
-#                 else:
-#                     reward = 0
-
-#             if action == 1:  # buy
-#                 reward = 0-commission
-#                 num_shares += 1
-#                 bought_history.append((current_adj_close))
-
-#             if action == 2:  # sell
-#                 if num_shares > 0:
-#                     bought_price = bought_history[0]
-#                     reward = (current_adj_close - bought_price) - commission
-#                     bought_history.pop(0)
-#                     num_shares -= 1
-
-#                 else:
-#                     reward = 0 - sell_penalty
-#             actions_history.append((i, current_adj_close, action))
-
-#             # update q table
-#             q.loc[state, action] = (
-#                 1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))
-            
-#             # ---- (tentative) start of q-table info plotting/output -----
-#             q_last_1 = q_cur_1.copy()
-#             q_cur_1 = q.copy()
-            
-#             # add convergence tracking for episode 1
-#             if episode == 1:
-#                 MSE_1 = np.sum(np.square(q_cur_1-q_last_1).values)
-#                 errs_1 += [MSE_1]
-            
-#         # add convergence tracking for episode 1
-#         if episode == 1:
-#             plt.figure(figsize=(14,8))
-#             plt.title('Q Table Stabilization Within Episode 1',size=25)
-#             plt.xlabel('Day Number',size=20)
-#             plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
-#             x_axis = np.array([i+1 for i in range(len(errs_1))])
-#             plt.plot(x_axis,errs_1)
-#             plt.show()
-            
-#         # calculate MSE between epsiodes
-#         q_last = q_cur.copy()
-#         q_cur = q.copy()
-            
-#         # update MSE tracking
-#         MSE = np.sum(np.square(q_cur - q_last).values)
-        
-#         # plot irregularities
-#         if episode > 1:
-#             if MSE > errs[-1]*3:
-
-#                 print('Episode ' + str(episode) + ' showed irregularity. MSE was ' + str(MSE) + '. Showing big 10 biggest jumps in QTable below.')
- 
-#                 q_diff = (q_cur - q_last).copy()
-#                 q_diff['colsum'] = q_diff.sum(axis=1)
-#                 q_diff = q_diff.sort_values('colsum',ascending=False).iloc[:10]
-#                 print(q_diff.drop(columns=['colsum']))
-#                 print('\n\n\n\n')
-          
-#         errs += [MSE]
-            
-#     print('End of Training!')
-    
-#     # plot MSE
-#     plt.figure(figsize=(14,8))
-#     plt.title('Q Table Stabilization By Episode',size=25)
-#     plt.xlabel('Episode Number',size=20)
-#     plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
-#     x_axis = np.array([i+1 for i in range(len(errs))])
-#     plt.plot(x_axis,errs)
-    
-#     # plot MSE for episodes 1-10
-#     if len(errs) >= 10:
-#         # plot MSE
-#         errs_new = errs[:10]
-#         plt.figure(figsize=(14,8))
-#         plt.title('Q Table Stabilization By Episode (Episodes 1-10)',size=25)
-#         plt.xlabel('Episode Number',size=20)
-#         plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
-#         x_axis = np.array([i+1 for i in range(len(errs_new))])
-#         plt.plot(x_axis,errs_new)
-        
-#     # plot MSE for episodes 11-end if possible
-#     if len(errs) >= 10:
-#         # plot MSE
-#         errs_new = errs[11:]
-#         plt.figure(figsize=(14,8))
-#         plt.title('Q Table Stabilization By Episode (Episodes 11-End)',size=25)
-#         plt.xlabel('Episode Number',size=20)
-#         plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14)
-#         x_axis = np.array([i+11 for i in range(len(errs_new))])
-#         plt.plot(x_axis,errs_new)
-      
-
-#     return q, actions_history, returns_since_entry
+
 
 #def trainqlearner(start_date, end_date, ticker,alpha=0.01, epsilon=0.2, epsilon_decay = .99995, gamma=0.95, episodes=500,commission=0,sell_penalty=0):
-#def trainqlearner(train_data,start_date, end_date, ticker, q, gamma=0.95, episodes=200, sh = 50):
-def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 50):
+def trainqlearner(ticker, start_date, end_date, window, gamma, episodes, sh):
     # Split the data into train and test data set
     train_df = d.get_stock_data(ticker, start_date, end_date)
 
@@ -497,42 +289,36 @@ def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 5
     all_actions = {0: 'hold', 1: 'buy', 2: 'sell'}
 
     # create_df = normalized predictors norm_bb_width, norm_adj_close, norm_close_sma_ratio
-    print("START create_df")
-    train_df = d.create_df(train_df, 3)
-    print("END create_df")
+    train_df = d.create_df(train_df, window)
 
     # get_states = States Dictionary after discretizing by converting continuous values to integer state
     percent_b_states_values, close_sma_ratio_states_value = d.get_states(
         train_df)
 
+
     # Create_state_df =  Add state information to the DF
     train_df = d.create_state_df(
         train_df, percent_b_states_values, close_sma_ratio_states_value)
+
     #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value)
 
-    # New
     cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
 
     # Return a list of strings representing the combination of all the states
     all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)
     # all_states = d.get_all_states(None, percent_b_states_values, close_sma_ratio_states_value)
+
     states_size = len(all_states)
 
     # Preparation of the Q Table
-    print("START q_init")
-    q_init = initialize_q_mat(all_states, all_actions)/1e9
-    print("END q_init")
-    print(q_init)
-    
-    train_data = np.array(train_df[['norm_adj_close', 'state']])
+
+    q = initialize_q_mat(all_states, all_actions)/1e5
     
-    print("START train_q_learning")
-    q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q_init, gamma=0.95, episodes=1, sh = 50)
-    print("END train_q_learning")
+    train_data = np.array(train_df[['Adj Close', 'state']])
+
+
     
-    # Specify quantiles
-    BB_quantiles = percent_b_states_values
-    SMA_ratio_quantiles = close_sma_ratio_states_value
-    cash_quantiles, holdings_quantiles = d.create_cash_and_holdings_quantiles()
+    q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, cash_states_values, shares_states_values, gamma, episodes, sh)
+
 
-    return q, percent_b_states_values, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles
+    return q, train_actions_history, train_returns_since_entry, percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values