From f3e6a5c6b330e0e16d98fd3c39234af60850e697 Mon Sep 17 00:00:00 2001
From: Mariem Ayadi <mayadi@smith.edu>
Date: Mon, 30 Nov 2020 20:12:51 -0500
Subject: [PATCH] #55 adjustment of the test and visualization

---
 .../stock_trader_using_q_learning_v02.py      | 130 ++++++++++--------
 1 file changed, 74 insertions(+), 56 deletions(-)

diff --git a/QLearner_v02/stock_trader_using_q_learning_v02.py b/QLearner_v02/stock_trader_using_q_learning_v02.py
index 5c9327c..f9de7a6 100644
--- a/QLearner_v02/stock_trader_using_q_learning_v02.py
+++ b/QLearner_v02/stock_trader_using_q_learning_v02.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
-"""Nov_Dec_v02.ipynb
+"""Nov-Dec
 
 Automatically generated by Colaboratory.
 
 Original file is located at
-    https://colab.research.google.com/drive/1n3xBQoZ2oj1l2nCwOdyMEpj3vkeE1gTH
+    https://colab.research.google.com/drive/1yWVJpo2nne7N2jla67bPQGOObI8sKghy
 """
 
 import datetime
@@ -38,23 +38,20 @@ def get_stock_data(symbol, start, end, train_size=0.8):
     train_df, test_df OR df(if train_size=1)
     '''
     df = web.DataReader(symbol, 'yahoo', start, end)
-    
-    train_len = int(df.shape[0] * train_size)
-    
-    if train_len > 0:
-        train_df = df.iloc[:train_len, :]
-        test_df = df.iloc[train_len:, :]
-        return train_df, test_df
-    else:
-        return df
+    return df
 
 start = datetime.datetime(2007, 1, 1)
 end = datetime.datetime(2016, 12, 31)
+start_1 = datetime.datetime(2017, 1, 1)
+end_1 = datetime.datetime(2019, 12, 31)
 
-train_df, test_df = get_stock_data('JPM', start, end, 0.8)
+train_df = get_stock_data('JPM', start, end, 1)
+test_df = get_stock_data('JPM', start_1, end_1, 1)
 
 train_df.head()
 
+test_df.head()
+
 all_actions = {0:'hold', 1:'buy', 2:'sell'}
 
 # def get_bollinger_bands(values, window):
@@ -317,6 +314,8 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_s
 # test_df = create_df(test_df, 3)
 # test_df = create_state_df(test_df, percent_b_states_values, close_sma_ratio_states_value)
 
+train_df
+
 def initialize_q_mat(all_states, all_actions):
     '''
     Initialize Q-table
@@ -397,9 +396,9 @@ def get_return_since_entry(bought_history, current_adj_close):
 
 train_df[['Adj Close', 'state']].head()
 
-#0.8 * (x)^506 = 0.1
+0.8 * (x)^506 = 0.1
 
-def train_q_learning(train_data, q, alpha, gamma, episodes):
+def train_q_learning(train_data, q, gamma, episodes):
     '''
     Train a Q-table 
     Inputs:
@@ -418,15 +417,14 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
     # returns_since_entry = [0]
     # cash = 100000
     alpha = 0.4
- 
     for ii in range(episodes):
         actions_history = []
         cash = 100000
         num_shares = 0
-        # bought_history = []
-        # returns_since_entry = [0]
-        # days=[0]
+        if ii > 1:
+          alpha = alpha*0.985
         epsilon = 0.8
+        current_portfolio_value = []      
         for i, val in enumerate(train_data):
             current_adj_close, state = val
             try:
@@ -434,18 +432,17 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
             except:
                 break
 
+
             current_cash_state = value_to_state(cash, cash_states_values)
             current_share_state = value_to_state(num_shares, shares_states_values)
-
             state = state + current_cash_state + current_share_state
-            #print(state)
 
-            
+
             if i >=1:
               epsilon*= 0.9958
               
             action = act(state, q, threshold=epsilon, actions_size=3)
-         
+            
             # get reward
             if action == 0: # hold
                 if num_shares > 0:
@@ -483,22 +480,22 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
             ## Note: cash and num_share are automatically updated in at the end of the Action code block
             next_state = next_state + next_cash_state + next_share_state
 
+            # #TODO 
+            # Study 
+
             actions_history.append((i, current_adj_close, action))
             
-            # print(q.loc[state,:])
+
             
             # update q table
             q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))
-            
-            # print(q.loc[state,:])
-            # print(state, action)
-            # print(q.loc[state, action])
 
-            # print("\n")
+
+            current_portfolio_value.append(cash + num_shares*next_adj_close)
+
 
     print('End of Training!')
-    #return q, actions_history, returns_since_entry
-    return q, actions_history
+    return q, actions_history, current_portfolio_value
 
 def visualize_results(actions_history, returns_since_entry):
     '''
@@ -511,7 +508,7 @@ def visualize_results(actions_history, returns_since_entry):
     Output:
     None
     '''
-    f, (ax1, ax2) = plt.subplots(2, 1, figsize=(15,12))
+    f, (ax1, ax2) = plt.subplots(2, 1, figsize=(30,24))
     
     ax1.plot(returns_since_entry)
     
@@ -597,10 +594,10 @@ def eval_q_learning(test_data, q):
     returns_since_entry(list): contains every day's return since entry
     '''
     actions_history = []
+    current_portfolio_value = []
+    cash = 100000
     num_shares = 0
-    returns_since_entry = [0]
-    bought_history = []
-
+    act_list = []
     for i, val in enumerate(test_data):
         current_adj_close, state = val
         try:
@@ -609,52 +606,73 @@ def eval_q_learning(test_data, q):
             print('End of data! Done!')
             break   
 
-        if len(bought_history) > 0:
-            returns_since_entry.append(get_return_since_entry(bought_history, current_adj_close)) 
-        else:
-            returns_since_entry.append(returns_since_entry[-1])
 
-        # decide action
+        current_cash_state = value_to_state(cash, cash_states_values)
+        current_share_state = value_to_state(num_shares, shares_states_values)
+        state = state + current_cash_state + current_share_state
+
+          
         action = act(state, q, threshold=0, actions_size=3)
+        
+        # get reward
 
         if action == 1: # buy
-            num_shares += 1
-            bought_history.append((current_adj_close))
+            if cash > current_adj_close:
+              next_cash = cash - current_adj_close
+              num_shares += 1
+              cash = next_cash
+            else: 
+              action = 0
+        
         if action == 2: # sell
             if num_shares > 0:
-                bought_price = bought_history[0]
-                bought_history.pop(0)
+                next_cash = cash + current_adj_close
                 num_shares -= 1
+                cash = next_cash
+            else:
+                action = 0
+        
+        act_list.append(action)
 
-        actions_history.append((i, current_adj_close, action))
+        #NEXT using cash and share
 
-    return actions_history, returns_since_entry
+        #next_cash_state = value_to_state(next_cash,cash_states_values)
+        ## Use 'cash' instead as affect 'current'
+        next_cash_state = value_to_state(cash,cash_states_values)
+        next_share_state = value_to_state(num_shares, shares_states_values)
+        ## Note: cash and num_share are automatically updated in at the end of the Action code block
+        next_state = next_state + next_cash_state + next_share_state
 
-type(q)
+        actions_history.append((i, current_adj_close, action))
+        
+        current_portfolio_value.append(cash + num_shares*next_adj_close)
+
+    return actions_history, current_portfolio_value, act_list
 
-q.div(q.sum(axis=1), axis=0)
+pd.Series(train_returns_since_entry).describe()
 
-print(q[70:90])
+pd.Series(train_actions_history).value_counts()
 
 train_data = np.array(train_df[['norm_adj_close', 'state']])
-#q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
-q_mat, train_actions_history = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
+q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, gamma=0.95, episodes=200)
 
 q_mat[:10]
 
 visualize_results(train_actions_history, train_returns_since_entry)
-get_invested_capital(train_actions_history, train_returns_since_entry)
-print('base return/invest ratio {}'.format(get_base_return(train_data)))
+# get_invested_capital(train_actions_history, train_returns_since_entry)
+# print('base return/invest ratio {}'.format(get_base_return(train_data)))
+
+test_df = create_df(test_df, 5)
+test_df = create_state_df(test_df, percent_b_states_values , close_sma_ratio_states_value)
 
 test_data = np.array(test_df[['norm_adj_close', 'state']])
-test_actions_history, test_returns_since_entry = eval_q_learning(test_data, q)
+test_actions_history, test_returns_since_entry, act_list = eval_q_learning(test_data, q)
 
 pd.Series(test_data[:,1]).value_counts()
 
 visualize_results(test_actions_history, test_returns_since_entry)
-get_invested_capital(test_actions_history, test_returns_since_entry)
-# print('invested capital {}, return/invest ratio {}'.format(invested_capital, return_invest_ratio))
-print('base return/invest ratio {}'.format(get_base_return(test_data)))
+
+pd.Series(test_returns_since_entry).describe()
 
 train_return_invest_ratios = []
 test_return_invest_ratios = []