diff --git a/QLearner_v02/stock_trader_using_q_learning_v02.py b/QLearner_v02/stock_trader_using_q_learning_v02.py index 5c9327c..f9de7a6 100644 --- a/QLearner_v02/stock_trader_using_q_learning_v02.py +++ b/QLearner_v02/stock_trader_using_q_learning_v02.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- -"""Nov_Dec_v02.ipynb +"""Nov-Dec Automatically generated by Colaboratory. Original file is located at - https://colab.research.google.com/drive/1n3xBQoZ2oj1l2nCwOdyMEpj3vkeE1gTH + https://colab.research.google.com/drive/1yWVJpo2nne7N2jla67bPQGOObI8sKghy """ import datetime @@ -38,23 +38,20 @@ def get_stock_data(symbol, start, end, train_size=0.8): train_df, test_df OR df(if train_size=1) ''' df = web.DataReader(symbol, 'yahoo', start, end) - - train_len = int(df.shape[0] * train_size) - - if train_len > 0: - train_df = df.iloc[:train_len, :] - test_df = df.iloc[train_len:, :] - return train_df, test_df - else: - return df + return df start = datetime.datetime(2007, 1, 1) end = datetime.datetime(2016, 12, 31) +start_1 = datetime.datetime(2017, 1, 1) +end_1 = datetime.datetime(2019, 12, 31) -train_df, test_df = get_stock_data('JPM', start, end, 0.8) +train_df = get_stock_data('JPM', start, end, 1) +test_df = get_stock_data('JPM', start_1, end_1, 1) train_df.head() +test_df.head() + all_actions = {0:'hold', 1:'buy', 2:'sell'} # def get_bollinger_bands(values, window): @@ -317,6 +314,8 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_s # test_df = create_df(test_df, 3) # test_df = create_state_df(test_df, percent_b_states_values, close_sma_ratio_states_value) +train_df + def initialize_q_mat(all_states, all_actions): ''' Initialize Q-table @@ -397,9 +396,9 @@ def get_return_since_entry(bought_history, current_adj_close): train_df[['Adj Close', 'state']].head() -#0.8 * (x)^506 = 0.1 +0.8 * (x)^506 = 0.1 -def train_q_learning(train_data, q, alpha, gamma, episodes): +def train_q_learning(train_data, q, gamma, episodes): ''' Train a Q-table Inputs: @@ -418,15 +417,14 @@ def train_q_learning(train_data, q, alpha, gamma, episodes): # returns_since_entry = [0] # cash = 100000 alpha = 0.4 - for ii in range(episodes): actions_history = [] cash = 100000 num_shares = 0 - # bought_history = [] - # returns_since_entry = [0] - # days=[0] + if ii > 1: + alpha = alpha*0.985 epsilon = 0.8 + current_portfolio_value = [] for i, val in enumerate(train_data): current_adj_close, state = val try: @@ -434,18 +432,17 @@ def train_q_learning(train_data, q, alpha, gamma, episodes): except: break + current_cash_state = value_to_state(cash, cash_states_values) current_share_state = value_to_state(num_shares, shares_states_values) - state = state + current_cash_state + current_share_state - #print(state) - + if i >=1: epsilon*= 0.9958 action = act(state, q, threshold=epsilon, actions_size=3) - + # get reward if action == 0: # hold if num_shares > 0: @@ -483,22 +480,22 @@ def train_q_learning(train_data, q, alpha, gamma, episodes): ## Note: cash and num_share are automatically updated in at the end of the Action code block next_state = next_state + next_cash_state + next_share_state + # #TODO + # Study + actions_history.append((i, current_adj_close, action)) - # print(q.loc[state,:]) + # update q table q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max())) - - # print(q.loc[state,:]) - # print(state, action) - # print(q.loc[state, action]) - # print("\n") + + current_portfolio_value.append(cash + num_shares*next_adj_close) + print('End of Training!') - #return q, actions_history, returns_since_entry - return q, actions_history + return q, actions_history, current_portfolio_value def visualize_results(actions_history, returns_since_entry): ''' @@ -511,7 +508,7 @@ def visualize_results(actions_history, returns_since_entry): Output: None ''' - f, (ax1, ax2) = plt.subplots(2, 1, figsize=(15,12)) + f, (ax1, ax2) = plt.subplots(2, 1, figsize=(30,24)) ax1.plot(returns_since_entry) @@ -597,10 +594,10 @@ def eval_q_learning(test_data, q): returns_since_entry(list): contains every day's return since entry ''' actions_history = [] + current_portfolio_value = [] + cash = 100000 num_shares = 0 - returns_since_entry = [0] - bought_history = [] - + act_list = [] for i, val in enumerate(test_data): current_adj_close, state = val try: @@ -609,52 +606,73 @@ def eval_q_learning(test_data, q): print('End of data! Done!') break - if len(bought_history) > 0: - returns_since_entry.append(get_return_since_entry(bought_history, current_adj_close)) - else: - returns_since_entry.append(returns_since_entry[-1]) - # decide action + current_cash_state = value_to_state(cash, cash_states_values) + current_share_state = value_to_state(num_shares, shares_states_values) + state = state + current_cash_state + current_share_state + + action = act(state, q, threshold=0, actions_size=3) + + # get reward if action == 1: # buy - num_shares += 1 - bought_history.append((current_adj_close)) + if cash > current_adj_close: + next_cash = cash - current_adj_close + num_shares += 1 + cash = next_cash + else: + action = 0 + if action == 2: # sell if num_shares > 0: - bought_price = bought_history[0] - bought_history.pop(0) + next_cash = cash + current_adj_close num_shares -= 1 + cash = next_cash + else: + action = 0 + + act_list.append(action) - actions_history.append((i, current_adj_close, action)) + #NEXT using cash and share - return actions_history, returns_since_entry + #next_cash_state = value_to_state(next_cash,cash_states_values) + ## Use 'cash' instead as affect 'current' + next_cash_state = value_to_state(cash,cash_states_values) + next_share_state = value_to_state(num_shares, shares_states_values) + ## Note: cash and num_share are automatically updated in at the end of the Action code block + next_state = next_state + next_cash_state + next_share_state -type(q) + actions_history.append((i, current_adj_close, action)) + + current_portfolio_value.append(cash + num_shares*next_adj_close) + + return actions_history, current_portfolio_value, act_list -q.div(q.sum(axis=1), axis=0) +pd.Series(train_returns_since_entry).describe() -print(q[70:90]) +pd.Series(train_actions_history).value_counts() train_data = np.array(train_df[['norm_adj_close', 'state']]) -#q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1) -q_mat, train_actions_history = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1) +q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, gamma=0.95, episodes=200) q_mat[:10] visualize_results(train_actions_history, train_returns_since_entry) -get_invested_capital(train_actions_history, train_returns_since_entry) -print('base return/invest ratio {}'.format(get_base_return(train_data))) +# get_invested_capital(train_actions_history, train_returns_since_entry) +# print('base return/invest ratio {}'.format(get_base_return(train_data))) + +test_df = create_df(test_df, 5) +test_df = create_state_df(test_df, percent_b_states_values , close_sma_ratio_states_value) test_data = np.array(test_df[['norm_adj_close', 'state']]) -test_actions_history, test_returns_since_entry = eval_q_learning(test_data, q) +test_actions_history, test_returns_since_entry, act_list = eval_q_learning(test_data, q) pd.Series(test_data[:,1]).value_counts() visualize_results(test_actions_history, test_returns_since_entry) -get_invested_capital(test_actions_history, test_returns_since_entry) -# print('invested capital {}, return/invest ratio {}'.format(invested_capital, return_invest_ratio)) -print('base return/invest ratio {}'.format(get_base_return(test_data))) + +pd.Series(test_returns_since_entry).describe() train_return_invest_ratios = [] test_return_invest_ratios = []