#55 integration code fixes pre-testing with sim error based on 2 stat…

…es vs 4 states injestion
AmoghM · Dec 6, 2020 · e1aae30 · e1aae30
1 parent 34d0626
commit e1aae30
Show file tree

Hide file tree

Showing 4 changed files with 2,650 additions and 700 deletions.
diff --git a/src/data_process.py b/src/data_process.py
@@ -168,7 +168,7 @@ def create_cash_and_holdings_quantiles():
 
     cash_states_values = {}
     for i in range(len(cash_list)):
-    cash_states_values[i] = cash_list[i]
+        cash_states_values[i] = cash_list[i]
     cash_states_values[9] = float("inf")
 
     # HOLDINGS = Num Shares (State 4)
@@ -177,7 +177,7 @@ def create_cash_and_holdings_quantiles():
 
     shares_states_values = {}
     for i in range(len(shares_list)):
-    shares_states_values[i] = shares_list[i]
+        shares_states_values[i] = shares_list[i]
     shares_states_values[9] = float("inf")
 
     return cash_states_values, shares_states_values
@@ -271,6 +271,8 @@ def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
     Output:
     df(dataframe)
     '''
+    percent_b_states_values, close_sma_ratio_states_value = get_states(df)
+
     #df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2 
     df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value))
     df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values))

diff --git a/src/sim.py b/src/sim.py
@@ -23,14 +23,12 @@
 start_date = dt.datetime(2007, 1, 1)
 end_date = dt.datetime(2016, 12, 31)
 
-q, bb_states_value, SMA_ratio_quantiles, MRDR_values = tu.trainqlearner(start_date, end_date, ticker)
+print("START")
+q, bb_states_value, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles = tu.trainqlearner(start_date, end_date, ticker)
+print("END")
 q.columns = ['HOLD', 'BUY', 'SELL']
 bb_ = list(bb_states_value.values())
-
-print(bb_)
-
 sma_ = list(SMA_ratio_quantiles.values())
-mrdr_ = list(MRDR_values.values())
 
 # Fixing the range problem
 
@@ -500,7 +498,7 @@ def ols(stock_table,money,inc, original_shares,commission):
     return results
 
 # def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window):
-def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
+def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
     '''
     Enacts qlearning
 
@@ -592,16 +590,17 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
                 smq = len(SMA_quantiles) - 1
 
             # find current SMA value
-            mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1]
+            #mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1]
 
             # find current SMA quantile
-            if mra != float('inf'):
-                mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0]
-            else:
-                mrq = len(MRDR_quantiles) - 1
+            # if mra != float('inf'):
+            #     mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0]
+            # else:
+            #     mrq = len(MRDR_quantiles) - 1
 
             # find state based on these two pieces of information
-            state =  str(smq) + str(bbq) + str(mrq)
+            #state =  str(smq) + str(bbq) + str(mrq)
+            state =  str(smq) + str(bbq)
 
             # locate *optimal* action from Q table, which we will then examine to see if it's possible
 #             print("STATE: ", state, str(bbq), str(smq))
@@ -663,7 +662,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
 
     actions = pd.Series(actions,index=stock_table.index)
 
-    results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles,'MRDR_quantiles':MRDR_quantiles, 'markov':markov}
+    results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles, 'markov':markov}
     return results
 
 # function to return stats and graphs
@@ -757,19 +756,19 @@ def return_stats(stock='aapl',
 
             # marginalize over MRDR
             # TODO - determine if this mean was taken correctly
-            qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
-            qtab_mrdr = qtab_mrdr.iloc[::-1]
-            qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5)
+            # qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
+            # qtab_mrdr = qtab_mrdr.iloc[::-1]
+            # qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5)
 
-            plt.figure(figsize=(9,7))
-            fig = heatmap(qtab_mrdr,cmap='Blues')
-            plt.title('Market Relative Daily Return Q-Table',size=16)
-            plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white')
-            plt.xticks(fontsize=15)
-            plt.yticks(fontsize=14,rotation=0)
-            plt.gca().tick_params(axis='x',bottom=False,left=False)
-            plt.gca().tick_params(axis='y',bottom=False,left=False)
-            plt.show(fig)
+            # plt.figure(figsize=(9,7))
+            # fig = heatmap(qtab_mrdr,cmap='Blues')
+            # plt.title('Market Relative Daily Return Q-Table',size=16)
+            # plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white')
+            # plt.xticks(fontsize=15)
+            # plt.yticks(fontsize=14,rotation=0)
+            # plt.gca().tick_params(axis='x',bottom=False,left=False)
+            # plt.gca().tick_params(axis='y',bottom=False,left=False)
+            # plt.show(fig)
 
 
     # get markov transition models
@@ -829,7 +828,7 @@ def return_stats(stock='aapl',
         try:
             del dic['BB_quantiles']
             del dic['SMA_quantiles']
-            del dic['MRDR_quantiles']
+            # del dic['MRDR_quantiles']
         except:
             pass
         df = pd.DataFrame(dic)

diff --git a/src/trainqlearner_util.py b/src/trainqlearner_util.py
@@ -166,11 +166,18 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
     actions_history(dict): has everydays' actions and close price
     returns_since_entry(list): contains every day's return since entry
     '''
+    # create framework for episode-to-episode Q table change tracking; will track MSE between episodes
+    episode = 0
+    q_cur = q.copy()
+    errs = []
+    episode_decile = episodes//10
+
     # actions_history = []
     # num_shares = 0
     # bought_history = []
     # returns_since_entry = [0]
     # cash = 100000
+    cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
     alpha = 0.1
     for ii in range(episodes):
         actions_history = []
@@ -186,13 +193,11 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
                 next_adj_close, next_state = train_data[i+1]
             except:
                 break
-
-
-            current_cash_state = value_to_state(cash, cash_states_values)
-            current_share_state = value_to_state(num_shares, shares_states_values)
+
+            current_cash_state = d.value_to_state(cash, cash_states_values)
+            current_share_state = d.value_to_state(num_shares, shares_states_values)
             state = state + current_cash_state + current_share_state
 
-
             if i >=1:
               epsilon*= 0.9958
 
@@ -230,8 +235,8 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
 
             #next_cash_state = value_to_state(next_cash,cash_states_values)
             ## Use 'cash' instead as affect 'current'
-            next_cash_state = value_to_state(cash,cash_states_values)
-            next_share_state = value_to_state(num_shares, shares_states_values)
+            next_cash_state = d.value_to_state(cash,cash_states_values)
+            next_share_state = d.value_to_state(num_shares, shares_states_values)
             ## Note: cash and num_share are automatically updated in at the end of the Action code block
             next_state = next_state + next_cash_state + next_share_state
 
@@ -240,11 +245,13 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
             # update q table
             q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))
 
+            print("ARRIVED AT PORTFOLIO VAL")
             current_portfolio_value.append(cash + num_shares*next_adj_close)
 
             # ---- (tentative) start of q-table info plotting/output -----
-            q_last_1 = q_cur_1.copy()
             q_cur_1 = q.copy()
+            q_last_1 = q_cur_1.copy()
+
 
             # add convergence tracking for episode 1
             if episode == 1:
@@ -262,8 +269,9 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
             plt.show()
 
         # calculate MSE between epsiodes
-        q_last = q_cur.copy()
         q_cur = q.copy()
+        q_last = q_cur.copy()
+
 
         # update MSE tracking
         MSE = np.sum(np.square(q_cur - q_last).values)
@@ -480,15 +488,18 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
 #     return q, actions_history, returns_since_entry
 
 #def trainqlearner(start_date, end_date, ticker,alpha=0.01, epsilon=0.2, epsilon_decay = .99995, gamma=0.95, episodes=500,commission=0,sell_penalty=0):
-def trainqlearner(train_data, q, gamma=0.95, episodes=200, sh = 50)
+#def trainqlearner(train_data,start_date, end_date, ticker, q, gamma=0.95, episodes=200, sh = 50):
+def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 50):
     # Split the data into train and test data set
     train_df = d.get_stock_data(ticker, start_date, end_date)
 
     # Action Definition (= Q table columns)
     all_actions = {0: 'hold', 1: 'buy', 2: 'sell'}
 
     # create_df = normalized predictors norm_bb_width, norm_adj_close, norm_close_sma_ratio
+    print("START create_df")
     train_df = d.create_df(train_df, 3)
+    print("END create_df")
 
     # get_states = States Dictionary after discretizing by converting continuous values to integer state
     percent_b_states_values, close_sma_ratio_states_value = d.get_states(
@@ -499,17 +510,26 @@ def trainqlearner(train_data, q, gamma=0.95, episodes=200, sh = 50)
         train_df, percent_b_states_values, close_sma_ratio_states_value)
     #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value)
 
+    # New
+    cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
+
     # Return a list of strings representing the combination of all the states
-    all_states = get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)
+    all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)
     # all_states = d.get_all_states(None, percent_b_states_values, close_sma_ratio_states_value)
     states_size = len(all_states)
 
     # Preparation of the Q Table
+    print("START q_init")
     q_init = initialize_q_mat(all_states, all_actions)/1e9
+    print("END q_init")
+    print(q_init)
 
     train_data = np.array(train_df[['norm_adj_close', 'state']])
 
-    q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, gamma=0.95, episodes=200, sh = 50)
+    print("START train_q_learning")
+    q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q_init, gamma=0.95, episodes=1, sh = 50)
+    print("END train_q_learning")
+
     # Specify quantiles
     BB_quantiles = percent_b_states_values
     SMA_ratio_quantiles = close_sma_ratio_states_value