Skip to content

Commit

Permalink
#55 adjustment of the test and visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
mariemayadi committed Dec 1, 2020
1 parent 33998c1 commit f3e6a5c
Showing 1 changed file with 74 additions and 56 deletions.
130 changes: 74 additions & 56 deletions QLearner_v02/stock_trader_using_q_learning_v02.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""Nov_Dec_v02.ipynb
"""Nov-Dec
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1n3xBQoZ2oj1l2nCwOdyMEpj3vkeE1gTH
https://colab.research.google.com/drive/1yWVJpo2nne7N2jla67bPQGOObI8sKghy
"""

import datetime
Expand Down Expand Up @@ -38,23 +38,20 @@ def get_stock_data(symbol, start, end, train_size=0.8):
train_df, test_df OR df(if train_size=1)
'''
df = web.DataReader(symbol, 'yahoo', start, end)

train_len = int(df.shape[0] * train_size)

if train_len > 0:
train_df = df.iloc[:train_len, :]
test_df = df.iloc[train_len:, :]
return train_df, test_df
else:
return df
return df

start = datetime.datetime(2007, 1, 1)
end = datetime.datetime(2016, 12, 31)
start_1 = datetime.datetime(2017, 1, 1)
end_1 = datetime.datetime(2019, 12, 31)

train_df, test_df = get_stock_data('JPM', start, end, 0.8)
train_df = get_stock_data('JPM', start, end, 1)
test_df = get_stock_data('JPM', start_1, end_1, 1)

train_df.head()

test_df.head()

all_actions = {0:'hold', 1:'buy', 2:'sell'}

# def get_bollinger_bands(values, window):
Expand Down Expand Up @@ -317,6 +314,8 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_s
# test_df = create_df(test_df, 3)
# test_df = create_state_df(test_df, percent_b_states_values, close_sma_ratio_states_value)

train_df

def initialize_q_mat(all_states, all_actions):
'''
Initialize Q-table
Expand Down Expand Up @@ -397,9 +396,9 @@ def get_return_since_entry(bought_history, current_adj_close):

train_df[['Adj Close', 'state']].head()

#0.8 * (x)^506 = 0.1
0.8 * (x)^506 = 0.1

def train_q_learning(train_data, q, alpha, gamma, episodes):
def train_q_learning(train_data, q, gamma, episodes):
'''
Train a Q-table
Inputs:
Expand All @@ -418,34 +417,32 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
# returns_since_entry = [0]
# cash = 100000
alpha = 0.4

for ii in range(episodes):
actions_history = []
cash = 100000
num_shares = 0
# bought_history = []
# returns_since_entry = [0]
# days=[0]
if ii > 1:
alpha = alpha*0.985
epsilon = 0.8
current_portfolio_value = []
for i, val in enumerate(train_data):
current_adj_close, state = val
try:
next_adj_close, next_state = train_data[i+1]
except:
break


current_cash_state = value_to_state(cash, cash_states_values)
current_share_state = value_to_state(num_shares, shares_states_values)

state = state + current_cash_state + current_share_state
#print(state)


if i >=1:
epsilon*= 0.9958

action = act(state, q, threshold=epsilon, actions_size=3)

# get reward
if action == 0: # hold
if num_shares > 0:
Expand Down Expand Up @@ -483,22 +480,22 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
## Note: cash and num_share are automatically updated in at the end of the Action code block
next_state = next_state + next_cash_state + next_share_state

# #TODO
# Study

actions_history.append((i, current_adj_close, action))

# print(q.loc[state,:])


# update q table
q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))

# print(q.loc[state,:])
# print(state, action)
# print(q.loc[state, action])

# print("\n")

current_portfolio_value.append(cash + num_shares*next_adj_close)


print('End of Training!')
#return q, actions_history, returns_since_entry
return q, actions_history
return q, actions_history, current_portfolio_value

def visualize_results(actions_history, returns_since_entry):
'''
Expand All @@ -511,7 +508,7 @@ def visualize_results(actions_history, returns_since_entry):
Output:
None
'''
f, (ax1, ax2) = plt.subplots(2, 1, figsize=(15,12))
f, (ax1, ax2) = plt.subplots(2, 1, figsize=(30,24))

ax1.plot(returns_since_entry)

Expand Down Expand Up @@ -597,10 +594,10 @@ def eval_q_learning(test_data, q):
returns_since_entry(list): contains every day's return since entry
'''
actions_history = []
current_portfolio_value = []
cash = 100000
num_shares = 0
returns_since_entry = [0]
bought_history = []

act_list = []
for i, val in enumerate(test_data):
current_adj_close, state = val
try:
Expand All @@ -609,52 +606,73 @@ def eval_q_learning(test_data, q):
print('End of data! Done!')
break

if len(bought_history) > 0:
returns_since_entry.append(get_return_since_entry(bought_history, current_adj_close))
else:
returns_since_entry.append(returns_since_entry[-1])

# decide action
current_cash_state = value_to_state(cash, cash_states_values)
current_share_state = value_to_state(num_shares, shares_states_values)
state = state + current_cash_state + current_share_state


action = act(state, q, threshold=0, actions_size=3)

# get reward

if action == 1: # buy
num_shares += 1
bought_history.append((current_adj_close))
if cash > current_adj_close:
next_cash = cash - current_adj_close
num_shares += 1
cash = next_cash
else:
action = 0

if action == 2: # sell
if num_shares > 0:
bought_price = bought_history[0]
bought_history.pop(0)
next_cash = cash + current_adj_close
num_shares -= 1
cash = next_cash
else:
action = 0

act_list.append(action)

actions_history.append((i, current_adj_close, action))
#NEXT using cash and share

return actions_history, returns_since_entry
#next_cash_state = value_to_state(next_cash,cash_states_values)
## Use 'cash' instead as affect 'current'
next_cash_state = value_to_state(cash,cash_states_values)
next_share_state = value_to_state(num_shares, shares_states_values)
## Note: cash and num_share are automatically updated in at the end of the Action code block
next_state = next_state + next_cash_state + next_share_state

type(q)
actions_history.append((i, current_adj_close, action))

current_portfolio_value.append(cash + num_shares*next_adj_close)

return actions_history, current_portfolio_value, act_list

q.div(q.sum(axis=1), axis=0)
pd.Series(train_returns_since_entry).describe()

print(q[70:90])
pd.Series(train_actions_history).value_counts()

train_data = np.array(train_df[['norm_adj_close', 'state']])
#q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
q_mat, train_actions_history = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, gamma=0.95, episodes=200)

q_mat[:10]

visualize_results(train_actions_history, train_returns_since_entry)
get_invested_capital(train_actions_history, train_returns_since_entry)
print('base return/invest ratio {}'.format(get_base_return(train_data)))
# get_invested_capital(train_actions_history, train_returns_since_entry)
# print('base return/invest ratio {}'.format(get_base_return(train_data)))

test_df = create_df(test_df, 5)
test_df = create_state_df(test_df, percent_b_states_values , close_sma_ratio_states_value)

test_data = np.array(test_df[['norm_adj_close', 'state']])
test_actions_history, test_returns_since_entry = eval_q_learning(test_data, q)
test_actions_history, test_returns_since_entry, act_list = eval_q_learning(test_data, q)

pd.Series(test_data[:,1]).value_counts()

visualize_results(test_actions_history, test_returns_since_entry)
get_invested_capital(test_actions_history, test_returns_since_entry)
# print('invested capital {}, return/invest ratio {}'.format(invested_capital, return_invest_ratio))
print('base return/invest ratio {}'.format(get_base_return(test_data)))

pd.Series(test_returns_since_entry).describe()

train_return_invest_ratios = []
test_return_invest_ratios = []
Expand Down

0 comments on commit f3e6a5c

Please sign in to comment.