Skip to content

Commit

Permalink
#55 integration code fixes pre-testing with sim error based on 2 stat…
Browse files Browse the repository at this point in the history
…es vs 4 states injestion
  • Loading branch information
mariemayadi committed Dec 6, 2020
1 parent 34d0626 commit e1aae30
Show file tree
Hide file tree
Showing 4 changed files with 2,650 additions and 700 deletions.
6 changes: 4 additions & 2 deletions src/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def create_cash_and_holdings_quantiles():

cash_states_values = {}
for i in range(len(cash_list)):
cash_states_values[i] = cash_list[i]
cash_states_values[i] = cash_list[i]
cash_states_values[9] = float("inf")

# HOLDINGS = Num Shares (State 4)
Expand All @@ -177,7 +177,7 @@ def create_cash_and_holdings_quantiles():

shares_states_values = {}
for i in range(len(shares_list)):
shares_states_values[i] = shares_list[i]
shares_states_values[i] = shares_list[i]
shares_states_values[9] = float("inf")

return cash_states_values, shares_states_values
Expand Down Expand Up @@ -271,6 +271,8 @@ def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
Output:
df(dataframe)
'''
percent_b_states_values, close_sma_ratio_states_value = get_states(df)

#df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2
df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value))
df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values))
Expand Down
51 changes: 25 additions & 26 deletions src/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@
start_date = dt.datetime(2007, 1, 1)
end_date = dt.datetime(2016, 12, 31)

q, bb_states_value, SMA_ratio_quantiles, MRDR_values = tu.trainqlearner(start_date, end_date, ticker)
print("START")
q, bb_states_value, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles = tu.trainqlearner(start_date, end_date, ticker)
print("END")
q.columns = ['HOLD', 'BUY', 'SELL']
bb_ = list(bb_states_value.values())

print(bb_)

sma_ = list(SMA_ratio_quantiles.values())
mrdr_ = list(MRDR_values.values())

# Fixing the range problem

Expand Down Expand Up @@ -500,7 +498,7 @@ def ols(stock_table,money,inc, original_shares,commission):
return results

# def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window):
def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
'''
Enacts qlearning
Expand Down Expand Up @@ -592,16 +590,17 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
smq = len(SMA_quantiles) - 1

# find current SMA value
mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1]
#mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1]

# find current SMA quantile
if mra != float('inf'):
mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0]
else:
mrq = len(MRDR_quantiles) - 1
# if mra != float('inf'):
# mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0]
# else:
# mrq = len(MRDR_quantiles) - 1

# find state based on these two pieces of information
state = str(smq) + str(bbq) + str(mrq)
#state = str(smq) + str(bbq) + str(mrq)
state = str(smq) + str(bbq)

# locate *optimal* action from Q table, which we will then examine to see if it's possible
# print("STATE: ", state, str(bbq), str(smq))
Expand Down Expand Up @@ -663,7 +662,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu

actions = pd.Series(actions,index=stock_table.index)

results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles,'MRDR_quantiles':MRDR_quantiles, 'markov':markov}
results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles, 'markov':markov}
return results

# function to return stats and graphs
Expand Down Expand Up @@ -757,19 +756,19 @@ def return_stats(stock='aapl',

# marginalize over MRDR
# TODO - determine if this mean was taken correctly
qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
qtab_mrdr = qtab_mrdr.iloc[::-1]
qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5)
# qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length)
# qtab_mrdr = qtab_mrdr.iloc[::-1]
# qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5)

plt.figure(figsize=(9,7))
fig = heatmap(qtab_mrdr,cmap='Blues')
plt.title('Market Relative Daily Return Q-Table',size=16)
plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white')
plt.xticks(fontsize=15)
plt.yticks(fontsize=14,rotation=0)
plt.gca().tick_params(axis='x',bottom=False,left=False)
plt.gca().tick_params(axis='y',bottom=False,left=False)
plt.show(fig)
# plt.figure(figsize=(9,7))
# fig = heatmap(qtab_mrdr,cmap='Blues')
# plt.title('Market Relative Daily Return Q-Table',size=16)
# plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white')
# plt.xticks(fontsize=15)
# plt.yticks(fontsize=14,rotation=0)
# plt.gca().tick_params(axis='x',bottom=False,left=False)
# plt.gca().tick_params(axis='y',bottom=False,left=False)
# plt.show(fig)


# get markov transition models
Expand Down Expand Up @@ -829,7 +828,7 @@ def return_stats(stock='aapl',
try:
del dic['BB_quantiles']
del dic['SMA_quantiles']
del dic['MRDR_quantiles']
# del dic['MRDR_quantiles']
except:
pass
df = pd.DataFrame(dic)
Expand Down
44 changes: 32 additions & 12 deletions src/trainqlearner_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,18 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
actions_history(dict): has everydays' actions and close price
returns_since_entry(list): contains every day's return since entry
'''
# create framework for episode-to-episode Q table change tracking; will track MSE between episodes
episode = 0
q_cur = q.copy()
errs = []
episode_decile = episodes//10

# actions_history = []
# num_shares = 0
# bought_history = []
# returns_since_entry = [0]
# cash = 100000
cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
alpha = 0.1
for ii in range(episodes):
actions_history = []
Expand All @@ -186,13 +193,11 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
next_adj_close, next_state = train_data[i+1]
except:
break


current_cash_state = value_to_state(cash, cash_states_values)
current_share_state = value_to_state(num_shares, shares_states_values)

current_cash_state = d.value_to_state(cash, cash_states_values)
current_share_state = d.value_to_state(num_shares, shares_states_values)
state = state + current_cash_state + current_share_state


if i >=1:
epsilon*= 0.9958

Expand Down Expand Up @@ -230,8 +235,8 @@ def train_q_learning(train_data, q, gamma, episodes,sh):

#next_cash_state = value_to_state(next_cash,cash_states_values)
## Use 'cash' instead as affect 'current'
next_cash_state = value_to_state(cash,cash_states_values)
next_share_state = value_to_state(num_shares, shares_states_values)
next_cash_state = d.value_to_state(cash,cash_states_values)
next_share_state = d.value_to_state(num_shares, shares_states_values)
## Note: cash and num_share are automatically updated in at the end of the Action code block
next_state = next_state + next_cash_state + next_share_state

Expand All @@ -240,11 +245,13 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
# update q table
q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max()))

print("ARRIVED AT PORTFOLIO VAL")
current_portfolio_value.append(cash + num_shares*next_adj_close)

# ---- (tentative) start of q-table info plotting/output -----
q_last_1 = q_cur_1.copy()
q_cur_1 = q.copy()
q_last_1 = q_cur_1.copy()


# add convergence tracking for episode 1
if episode == 1:
Expand All @@ -262,8 +269,9 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
plt.show()

# calculate MSE between epsiodes
q_last = q_cur.copy()
q_cur = q.copy()
q_last = q_cur.copy()


# update MSE tracking
MSE = np.sum(np.square(q_cur - q_last).values)
Expand Down Expand Up @@ -480,15 +488,18 @@ def train_q_learning(train_data, q, gamma, episodes,sh):
# return q, actions_history, returns_since_entry

#def trainqlearner(start_date, end_date, ticker,alpha=0.01, epsilon=0.2, epsilon_decay = .99995, gamma=0.95, episodes=500,commission=0,sell_penalty=0):
def trainqlearner(train_data, q, gamma=0.95, episodes=200, sh = 50)
#def trainqlearner(train_data,start_date, end_date, ticker, q, gamma=0.95, episodes=200, sh = 50):
def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 50):
# Split the data into train and test data set
train_df = d.get_stock_data(ticker, start_date, end_date)

# Action Definition (= Q table columns)
all_actions = {0: 'hold', 1: 'buy', 2: 'sell'}

# create_df = normalized predictors norm_bb_width, norm_adj_close, norm_close_sma_ratio
print("START create_df")
train_df = d.create_df(train_df, 3)
print("END create_df")

# get_states = States Dictionary after discretizing by converting continuous values to integer state
percent_b_states_values, close_sma_ratio_states_value = d.get_states(
Expand All @@ -499,17 +510,26 @@ def trainqlearner(train_data, q, gamma=0.95, episodes=200, sh = 50)
train_df, percent_b_states_values, close_sma_ratio_states_value)
#train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value)

# New
cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()

# Return a list of strings representing the combination of all the states
all_states = get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)
all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)
# all_states = d.get_all_states(None, percent_b_states_values, close_sma_ratio_states_value)
states_size = len(all_states)

# Preparation of the Q Table
print("START q_init")
q_init = initialize_q_mat(all_states, all_actions)/1e9
print("END q_init")
print(q_init)

train_data = np.array(train_df[['norm_adj_close', 'state']])

q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, gamma=0.95, episodes=200, sh = 50)
print("START train_q_learning")
q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q_init, gamma=0.95, episodes=1, sh = 50)
print("END train_q_learning")

# Specify quantiles
BB_quantiles = percent_b_states_values
SMA_ratio_quantiles = close_sma_ratio_states_value
Expand Down
Loading

0 comments on commit e1aae30

Please sign in to comment.