Skip to content

Commit

Permalink
#55 new state dependency updates within training
Browse files Browse the repository at this point in the history
  • Loading branch information
mariemayadi committed Nov 30, 2020
1 parent b16f452 commit 33998c1
Showing 1 changed file with 28 additions and 15 deletions.
43 changes: 28 additions & 15 deletions QLearner_v02/stock_trader_using_q_learning_v02.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""Nov_Dec.ipynb
"""Nov_Dec_v02.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1R9IfXSRHoNrhqZM4i5jO_V4lJXot6quc
https://colab.research.google.com/drive/1n3xBQoZ2oj1l2nCwOdyMEpj3vkeE1gTH
"""

import datetime
Expand Down Expand Up @@ -397,7 +397,7 @@ def get_return_since_entry(bought_history, current_adj_close):

train_df[['Adj Close', 'state']].head()

0.8 * (x)^506 = 0.1
#0.8 * (x)^506 = 0.1

def train_q_learning(train_data, q, alpha, gamma, episodes):
'''
Expand Down Expand Up @@ -436,41 +436,52 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):

current_cash_state = value_to_state(cash, cash_states_values)
current_share_state = value_to_state(num_shares, shares_states_values)
#TODO
#NEXT using cash and share

state = state + current_cash_state + current_share_state
print(state)
#print(state)


if i >=1:
epsilon*= 0.9958

action = act(state, q, threshold=epsilon, actions_size=3)




# get reward
if action == 0: # hold
if num_shares > 0:
next_cash = cash # no change
reward = (cash + num_shares*next_adj_close) - (cash + num_shares*current_adj_close)
else:
reward = 0

if action == 1: # buy
if cash > current_adj_close:
reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close)
next_cash = cash - current_adj_close
# reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close)
reward = (next_cash + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close)
num_shares += 1
cash = next_cash
else:
reward = 0

if action == 2: # sell
if num_shares > 0:
reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close)
next_cash = cash + current_adj_close
# reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close)
reward = (next_cash + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close)
num_shares -= 1
cash = next_cash
else:
reward = 0
# #TODO
# Study

#NEXT using cash and share

#next_cash_state = value_to_state(next_cash,cash_states_values)
## Use 'cash' instead as affect 'current'
next_cash_state = value_to_state(cash,cash_states_values)
next_share_state = value_to_state(num_shares, shares_states_values)
## Note: cash and num_share are automatically updated in at the end of the Action code block
next_state = next_state + next_cash_state + next_share_state

actions_history.append((i, current_adj_close, action))

Expand All @@ -486,7 +497,8 @@ def train_q_learning(train_data, q, alpha, gamma, episodes):
# print("\n")

print('End of Training!')
return q, actions_history, returns_since_entry
#return q, actions_history, returns_since_entry
return q, actions_history

def visualize_results(actions_history, returns_since_entry):
'''
Expand Down Expand Up @@ -625,7 +637,8 @@ def eval_q_learning(test_data, q):
print(q[70:90])

train_data = np.array(train_df[['norm_adj_close', 'state']])
q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
#q_mat, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)
q_mat, train_actions_history = train_q_learning(train_data, q, alpha=0.8, gamma=0.95, episodes=1)

q_mat[:10]

Expand Down

0 comments on commit 33998c1

Please sign in to comment.