From e19b89ed518c727d75287d5dc71831fb3d4aa034 Mon Sep 17 00:00:00 2001 From: Mariem Ayadi Date: Mon, 7 Dec 2020 00:02:57 -0500 Subject: [PATCH] #55 Full integration post debugging and sync with test section --- src/data_process.py | 95 +++++---- src/sim.py | 405 +++++++++++++++++++------------------- src/trainqlearner_util.py | 320 +++++------------------------- 3 files changed, 307 insertions(+), 513 deletions(-) diff --git a/src/data_process.py b/src/data_process.py index 583f150..0a9f31f 100644 --- a/src/data_process.py +++ b/src/data_process.py @@ -111,23 +111,6 @@ def get_stock_data(symbol, start, end): return df -def get_bollinger_bands(values, window): - ''' - Return upper and lower Bollinger Bands. - INPUTS: - values(pandas series) - window(int): time period to consider - OUTPUS: - band_width(pandas series) - ''' - # rolling mean - rm = values.rolling(window=window).mean() - rstd = values.rolling(window=window).std() - - band_width = rm / rstd - return band_width.apply(lambda x: round(x, 5)) - - def get_adj_close_sma_ratio(values, window): ''' Return the ratio of adjusted closing value to the simple moving average. @@ -142,7 +125,7 @@ def get_adj_close_sma_ratio(values, window): return ratio.apply(lambda x: round(x, 5)) -def discretize(values, num_states=4): +def discretize(values, num_states=9): ''' Convert continuous values to integer state Inputs: @@ -200,7 +183,7 @@ def value_to_state(value, states_value): return 'value out of range' -def create_df(df, window=45): +def create_df(df, window=5): ''' Create a dataframe with the normalized predictors norm_bb_width, norm_adj_close, norm_close_sma_ratio @@ -260,7 +243,7 @@ def get_states(df): return percent_b_states_values, close_sma_ratio_states_value -def create_state_df(df, bb_states_value, close_sma_ratio_states_value): +def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value): ''' Add a new column to hold the state information to the dataframe Inputs: @@ -271,8 +254,6 @@ def create_state_df(df, bb_states_value, close_sma_ratio_states_value): Output: df(dataframe) ''' - percent_b_states_values, close_sma_ratio_states_value = get_states(df) - #df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2 df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value)) df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values)) @@ -297,17 +278,47 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_s states = [] for c, _ in close_sma_ratio_states_value.items(): for b, _ in percent_b_states_values.items(): - for m, _ in cash_states_values.items(): - for s, _ in shares_states_values.items(): - state = str(c) + str(b) + str(m) + str(s) - states.append(str(state)) - + for m, _ in cash_states_values.items(): + for s, _ in shares_states_values.items(): + state = str(c) + str(b) + str(m) + str(s) + states.append(str(state)) + return states +# def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length): +# ''' +# takes a q table and does a weighted average group by given the input state_number (what digit number it is in the state) +# +# Inputs: +# qtable: the qtable (DataFrame) +# state_history: the state history (Series) +# state_num: the number digit that indicates the state +# quantile_length: the number of quantiles we built this out with +# ''' +# qtab_2 = pd.merge(qtable,pd.Series(state_history,name='state_history'),'inner',left_index=True,right_index=True) +# +# sh = qtab_2['state_history'] +# qtab_2 = qtab_2.drop(columns=['state_history']).multiply(qtab_2['state_history'],axis=0) +# +# qtab_2 = pd.merge(qtab_2,sh,'inner',left_index=True,right_index=True) +# +# qtab_2['state'] = qtab_2.index.str.slice(state_num,state_num+1) +# +# qtab_3 = qtab_2.groupby('state').sum() +# +# qtab_4 = qtab_3.divide(qtab_3['state_history'],axis=0).drop(columns='state_history') +# +# qtab_5 = qtab_4.reindex([str(i) for i in range(quantile_length)]) +# +# #normalize by max +# qtab_6 = qtab_5.divide(qtab_5.max(axis=1),axis=0) +# +# return qtab_6 + def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length): ''' takes a q table and does a weighted average group by given the input state_number (what digit number it is in the state) - + Inputs: qtable: the qtable (DataFrame) state_history: the state history (Series) @@ -315,24 +326,26 @@ def weighted_average_and_normalize(qtable,state_history,state_num,quantile_lengt quantile_length: the number of quantiles we built this out with ''' qtab_2 = pd.merge(qtable,pd.Series(state_history,name='state_history'),'inner',left_index=True,right_index=True) - + + # reverse normalization: qtab_2['state_history'] = 1/qtab_2['state_history'] + sh = qtab_2['state_history'] qtab_2 = qtab_2.drop(columns=['state_history']).multiply(qtab_2['state_history'],axis=0) - + qtab_2 = pd.merge(qtab_2,sh,'inner',left_index=True,right_index=True) - + qtab_2['state'] = qtab_2.index.str.slice(state_num,state_num+1) - - qtab_3 = qtab_2.groupby('state').sum() - - qtab_4 = qtab_3.divide(qtab_3['state_history'],axis=0).drop(columns='state_history') - + + + qtab_3 = qtab_2.groupby('state').sum().drop(columns='state_history') + + + qtab_4 = qtab_3.divide(qtab_3.abs().sum(axis=1),axis=0) + + qtab_5 = qtab_4.reindex([str(i) for i in range(quantile_length)]) - - #normalize by max - qtab_6 = qtab_5.divide(qtab_5.max(axis=1),axis=0) - - return qtab_6 + + return qtab_5 diff --git a/src/sim.py b/src/sim.py index 96f9289..0bfcd42 100644 --- a/src/sim.py +++ b/src/sim.py @@ -23,28 +23,27 @@ start_date = dt.datetime(2007, 1, 1) end_date = dt.datetime(2016, 12, 31) -print("START") -q, bb_states_value, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles = tu.trainqlearner(start_date, end_date, ticker) -print("END") -q.columns = ['HOLD', 'BUY', 'SELL'] -bb_ = list(bb_states_value.values()) -sma_ = list(SMA_ratio_quantiles.values()) -# Fixing the range problem +q, train_actions_history, train_returns_since_entry, percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values = tu.trainqlearner(ticker, start_date, end_date, window = 5, gamma = 0.95, episodes = 100, sh = 20) -# q.iloc[0] = q.iloc[0] * 1e-16 -#nq = (q - q.mean()) / q.std() +q.columns = ['HOLD', 'BUY', 'SELL'] nq=q nq.columns = ['HOLD', 'BUY', 'SELL'] action_list = ['BUY','HOLD','SELL'] nq = nq[action_list] -# nq = nq.div(nq.abs().max(axis=1), axis=0) forgoing normalization +test_df = d.get_stock_data(ticker, start, end) +test_df = d.create_df(test_df, 5) +test_df = d.create_state_df(test_df, percent_b_states_values , close_sma_ratio_states_value) +temp = test_df.iloc[:-1, :-1] +test_df = np.array(test_df[['Adj Close', 'state']]) + + + -# function to hold every day -def hold(stock_table,money,inc,original_shares,commission,): +def hold(stock_table,money,inc,original_shares,commission): ''' Enacts hold-every-day strategy @@ -68,6 +67,7 @@ def hold(stock_table,money,inc,original_shares,commission,): # calculate daily returns ret = returns(stock_table) + original_shares = original_shares + inc # dummy calculations to reset to initialize return calculations # what this does is just sets the first entry of the returns Series to total value of stock held originally @@ -89,10 +89,10 @@ def hold(stock_table,money,inc,original_shares,commission,): # add original cash to this final_vals += money - + # create markov transition matrix markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) - + markov.loc['HOLD','HOLD']=1 results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history': None} @@ -134,7 +134,7 @@ def random_action(stock_table,money,inc,original_shares,commission): # create shares table shares = stock_table.copy() shares.iloc[0] = original_shares - + # create markov transition matrix markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) @@ -182,12 +182,12 @@ def random_action(stock_table,money,inc,original_shares,commission): shares.iloc[i] = shares.values[j] - inc actions += [act] - + # increment markov markov.loc[actions[j],actions[i]] +=1 actions = pd.Series(actions,index=stock_table.index) - + # normalize markov markov = markov.divide(markov.sum(axis=1),axis=0).round(2) @@ -234,7 +234,7 @@ def rule_based(stock_table,money,inc, original_shares,commission): # create cash table cash = stock_table.copy() cash.iloc[0] = money - + # create markov transition matrix markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) @@ -286,15 +286,15 @@ def rule_based(stock_table,money,inc, original_shares,commission): shares.iloc[i] = shares.values[j] - inc actions += [act] - + # increment markov markov.loc[actions[j],actions[i]] +=1 actions = pd.Series(actions,index=stock_table.index) - + # normalize markov markov = markov.divide(markov.sum(axis=1),axis=0).round(2) - + results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov, 'state_history': None} return results @@ -334,7 +334,7 @@ def buy_always(stock_table,money,inc,original_shares,commission): # create shares table shares = stock_table.copy() shares.iloc[0] = original_shares - + # create markov transition matrix markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) @@ -357,8 +357,6 @@ def buy_always(stock_table,money,inc,original_shares,commission): # if you can't buy, hold if cur_cash < (cur_price*inc): act = 'HOLD' - - # else buy else: act = 'BUY' @@ -369,14 +367,14 @@ def buy_always(stock_table,money,inc,original_shares,commission): if act == 'BUY': cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission shares.iloc[i] = shares.values[j] + inc - + actions += [act] - + # increment markov markov.loc[actions[j],actions[i]] +=1 actions = pd.Series(actions,index=stock_table.index) - + # normalize markov markov = markov.divide(markov.sum(axis=1),axis=0).round(2) @@ -429,13 +427,13 @@ def ols(stock_table,money,inc, original_shares,commission): # calculate daily portfolio value final_vals = stock_table.copy() final_vals.iloc[0] = original_val - + # create markov transition matrix markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) # iterate through days for i in range(1,stock_table.shape[0]): - + j = i-1 # last day cur_cash = cash.values[j] # current cash cur_shares = shares.values[j] # current shares @@ -488,185 +486,124 @@ def ols(stock_table,money,inc, original_shares,commission): # increment markov markov.loc[actions[j],actions[i]] +=1 - + actions = pd.Series(actions,index=stock_table.index) - + # normalize markov markov = markov.divide(markov.sum(axis=1),axis=0).round(2) - + results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history': None} return results # def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window): -def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, window=5): # defining defaults here prevents need for args to be passed in return_stats function - ''' - Enacts qlearning - - Inputs - stock_table: list of daily stock or portfolio values - money: original cash held - inc: increment of buy/sell permitted - original_shares: original number of shares held - qtable: input qtable (Pandas dataframe, columns are "BUY SELL HOLD", rows are states) - BB_quantiles: quantiles of Bollinger bands - SMA_quantiles: quantiles of SMA - window: lookback window - - Output - results: dictionary holding... - *one Pandas series each (key/Series names are identical) for* - final_vals: final daily values of portfolio - actions: daily actions taken ("BUY" "SELL" "HOLD") - shares: daily number of shares of stock held - cash: daily amount of cash held - *additionally* - qtable: pandas dataframe formatted the same as the input dataframe (will be identical) +def qlearner(stock_table,money,inc, original_shares, commission, q_table = nq, test_data = test_df, percent_b_states_values = percent_b_states_values, close_sma_ratio_states_value = close_sma_ratio_states_value, cash_states_values = cash_states_values, shares_states_values = shares_states_values, temp = temp): ''' - - # record original value - print(stock_table[1]) - - original_val = money + (stock_table.values[0]*original_shares) # initial cash - - # generate table of returns - ret = returns(stock_table) - - # create actions table - actions = ['HOLD'] - - # create shares table - shares = stock_table.copy() - shares.iloc[0] = original_shares - - # create cash table - cash = stock_table.copy() - cash.iloc[0] = money - - # calculate daily portfolio value - final_vals = stock_table.copy() - final_vals.iloc[0] = original_val - + Evaluate the Q-table + Inputs: + test_data(dataframe) + q(dataframe): trained Q-table + Output: + actions_history(dict): has everydays' actions and close price + returns_since_entry(list): contains every day's return since entry + ''' + current_portfolio_value = [] + cash = money + num_shares = original_shares + curr_cash = [] + curr_shares = [] + curr_cash_s = [] + curr_shares_s = [] + act_list = [] + cash_list = [] + shares_list = [] + final_states = [] state_history = {} + actions_history =[] + for i, val in enumerate(test_data): + current_adj_close, state = val + try: + next_adj_close, next_state = test_data[i + 1] + except: + print('End of data! Done!') + break - # create markov transition matrix - markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list) - - # define baseline for mrdr - baseline = read_stock('^GSPC',start,end) - - # iterate through days - for i in range(1,stock_table.shape[0]): - - j = i-1 # last day - cur_cash = cash.values[j] # current cash - cur_shares = shares.values[j] # current shares - final_vals.iloc[i] = cur_cash + (cur_shares*stock_table.values[i]) # end of day portfolio value - cur_price = stock_table.values[j] + current_cash_state = d.value_to_state(cash, cash_states_values) + current_share_state = d.value_to_state(num_shares, shares_states_values) + state = state + current_cash_state + current_share_state - if i > window: # if we have enough of a lookback window to calculate stats + final_states.append(state) + curr_cash.append(cash) + curr_shares.append(num_shares) + curr_cash_s.append(current_cash_state) + curr_shares_s.append(current_share_state) - # find yesterday's final bollinger band value - upper, lower = d.get_upper_lower_bands(stock_table.iloc[:i], window) - bb = ((stock_table.iloc[:i] - lower) * 100 / (upper - lower)).iloc[j] + try: + state_history[state] += 1 + except KeyError: + state_history[state] = 1 + action = tu.act(state, q_table, threshold=0, actions_size=3) - #bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j] - # find yesterday's final bollinger band quantile - if bb != float('inf'): - bbq = np.argwhere(np.where(BB_quantiles>bb,1,0))[0][0] + if action == 0: # buy + if cash > inc * current_adj_close: + next_cash = cash - inc * current_adj_close + num_shares += inc + cash = next_cash else: - bbq = len(BB_quantiles) - 1 - + action = 1 - # find current SMA value - sma = d.get_adj_close_sma_ratio(stock_table.iloc[:i],window).iloc[j] - - # find current SMA quantile - if sma != float('inf'): - smq = np.argwhere(np.where(SMA_quantiles>sma,1,0))[0][0] - else: - smq = len(SMA_quantiles) - 1 - - # find current SMA value - #mra = d.get_mrdr(stock_table.iloc[:i],baseline).iloc[-1] - - # find current SMA quantile - # if mra != float('inf'): - # mrq = np.argwhere(np.where(MRDR_quantiles>mra,1,0))[0][0] - # else: - # mrq = len(MRDR_quantiles) - 1 - - # find state based on these two pieces of information - #state = str(smq) + str(bbq) + str(mrq) - state = str(smq) + str(bbq) - - # locate *optimal* action from Q table, which we will then examine to see if it's possible -# print("STATE: ", state, str(bbq), str(smq)) -# print(qtable.loc[state]) - cur_act = qtable.loc[state].idxmax() - - #maintain a score of state visited - ''' - if state_history.get(state, None) is None: - state_history[state] = 0 + if action == 2: # sell + if num_shares > 0: + next_cash = cash + inc * current_adj_close + num_shares -= inc + cash = next_cash else: - state_history[state]+=1 - ''' + action = 1 - try: - state_history[state] += 1 - except KeyError: - state_history[state] = 1 + if action == 0: + act_list.append('BUY') + elif action == 2: + act_list.append('SELL') + else: + act_list.append('HOLD') + actions_history.append((i, current_adj_close, action)) - else: # if we're too early to have a full lookback window - cur_act = 'HOLD' + cash_list.append(cash) + shares_list.append(num_shares) + current_portfolio_value.append(cash + num_shares * next_adj_close) - # if you can't buy or sell, hold - if cur_shares < inc and cur_cash < (cur_price*inc): - act = 'HOLD' - # if you can't sell, but you can buy... buy if it makes sense, or hold if it doesn't - elif cur_shares < inc: - act = 'BUY' if cur_act == 'BUY' else 'HOLD' - # if you can't buy, but you can sell... sell if it makes sense, or hold if it doesn't - elif cur_cash < (cur_price*inc): - act = 'SELL' if cur_act == 'SELL' else 'HOLD' + markov = pd.DataFrame(np.zeros((3, 3)), index=action_list, columns=action_list) + for i in range(1,len(act_list)): + markov.loc[act_list[i-1],act_list[i]] +=1 - # otherwise do whatever makes sense - else: - act = cur_act + temp['cash'] = curr_cash + temp['cash_state'] = curr_cash_s + temp['shares'] = curr_shares + temp['shares_state'] = curr_shares_s + temp['state'] = final_states + temp.to_csv('./data/viz_data.csv') - # take action - if act == 'HOLD': - cash.iloc[i] = cash.values[j] - shares.iloc[i] = shares.values[j] - if act == 'BUY': - cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission - shares.iloc[i] = shares.values[j] + inc - if act == 'SELL': - cash.iloc[i] = cash.values[j] + (inc*cur_price) - commission - shares.iloc[i] = shares.values[j] - inc - - actions += [act] - # increment markov - markov.loc[actions[j],actions[i]] +=1 - - - # normalize markov - markov = markov.divide(markov.sum(axis=1),axis=0).round(2) - - actions = pd.Series(actions,index=stock_table.index) + actions = pd.Series(act_list, index=stock_table.index) + f_shares = pd.Series(shares_list, index=stock_table.index) + f_cash = pd.Series(cash_list, index=stock_table.index) + final_vals = pd.Series(current_portfolio_value, index=stock_table.index) - results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':qtable, 'state_history':pd.Series(state_history),'BB_quantiles':BB_quantiles,'SMA_quantiles':SMA_quantiles, 'markov':markov} + results = {'final_vals': final_vals, 'actions': actions, 'shares': f_shares, 'cash': f_cash, 'qtable': q_table, + 'state_history': pd.Series(state_history), 'BB_quantiles': list(percent_b_states_values.values()), + 'SMA_quantiles': list(close_sma_ratio_states_value.values()), + 'CASH_quantiles': list(cash_states_values.values()), 'SHARE_quantiles': list(shares_states_values.values()), + 'markov': markov, 'actions_history' : actions_history} return results + # function to return stats and graphs -def return_stats(stock='aapl', +def return_stats(stock='jpm', commission = 2, money=100000, #inc=10,- can read this argument and change code below if doing absolute share-based @@ -687,24 +624,29 @@ def return_stats(stock='aapl', Provides numerous summary statistics and visualizations ''' - + original_money = money # generate stock table stock_table = read_stock(stock,start,end) - + # note stock name stock_name = stock.upper() # approximate 50/50 split in money-stock - original_shares = round(money / 2 / stock_table.values[0]) + original_shares = 0 # recalculate money accordingly - money -= (stock_table.values[0]*original_shares) + + money = original_money # make share increment about 1% of original share holdings - inc = m.ceil(original_shares / 100) + inc = 20 + + stock_table = stock_table[4:] + + # generate results results = {policy.__name__:policy(stock_table, @@ -713,6 +655,40 @@ def return_stats(stock='aapl', original_shares = original_shares, commission = commission) for policy in policies} + actions_history = results['qlearner']['actions_history'] + + days, prices, actions = [], [], [] + for d, p, a in actions_history: + days.append(d) + prices.append(p) + actions.append(a) + hold_d, hold_p, buy_d, buy_p, sell_d, sell_p = [], [], [], [], [], [] + for d, p, a in actions_history: + if a == 0: + hold_d.append(d) + hold_p.append(p) + if a == 1: + buy_d.append(d) + buy_p.append(p) + if a == 2: + sell_d.append(d) + sell_p.append(p) + + buys = pd.DataFrame(list(zip(hold_d, hold_p)), columns =['Date', 'Adj Close']) + sells = pd.DataFrame(list(zip(buy_d, buy_p)), columns =['Date', 'Adj Close']) + holds = pd.DataFrame(list(zip(sell_d, buy_p)), columns=['Date', 'Adj Close']) + + buys.to_csv('./data/buy_data.csv') + sells.to_csv('./data/sell_data.csv') + holds.to_csv('./data/hold_data.csv') + + + + + + + + # plot qtables only for qlearner (or any other strategies with Q table) for policy in policies: if results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies @@ -721,7 +697,7 @@ def return_stats(stock='aapl', state_history = results[policy.__name__]['state_history'] quantile_length = len(results[policy.__name__]['BB_quantiles']) qtab = results[policy.__name__]['qtable'] - + qtab_bb = weighted_average_and_normalize(qtab, state_history, 1, quantile_length) qtab_bb = qtab_bb.iloc[::-1] # reverse order of rows for visualization purposes - now biggest value will be on top qtab_bb.index = np.round(np.flip(np.array(results[policy.__name__]['BB_quantiles'])),5) # define index as bb quantiles, reversing quantile order in kind so biggest value is first @@ -740,9 +716,10 @@ def return_stats(stock='aapl', # marginalize over SMA # TODO - determine if this mean was taken correctly + quantile_length = len(results[policy.__name__]['SMA_quantiles']) qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length) qtab_sma = qtab_sma.iloc[::-1] - qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),5) + qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),10) plt.figure(figsize=(9,7)) fig = heatmap(qtab_sma,cmap='Blues') @@ -753,23 +730,40 @@ def return_stats(stock='aapl', plt.gca().tick_params(axis='x',bottom=False,left=False) plt.gca().tick_params(axis='y',bottom=False,left=False) plt.show(fig) - - # marginalize over MRDR - # TODO - determine if this mean was taken correctly - # qtab_mrdr = weighted_average_and_normalize(qtab, state_history, 2, quantile_length) - # qtab_mrdr = qtab_mrdr.iloc[::-1] - # qtab_mrdr.index = np.round(np.flip(np.array(results[policy.__name__]['MRDR_quantiles'])),5) - - # plt.figure(figsize=(9,7)) - # fig = heatmap(qtab_mrdr,cmap='Blues') - # plt.title('Market Relative Daily Return Q-Table',size=16) - # plt.gca().hlines([i+1 for i in range(len(qtab_mrdr.index))],xmin=0,xmax=10,linewidth=10,color='white') - # plt.xticks(fontsize=15) - # plt.yticks(fontsize=14,rotation=0) - # plt.gca().tick_params(axis='x',bottom=False,left=False) - # plt.gca().tick_params(axis='y',bottom=False,left=False) - # plt.show(fig) - + + #CASH + quantile_length = len(results[policy.__name__]['CASH_quantiles']) + qtab_sma = weighted_average_and_normalize(qtab, state_history, 2, quantile_length) + qtab_sma = qtab_sma.iloc[::-1] + qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['CASH_quantiles'])), 10) + + plt.figure(figsize=(9, 7)) + fig = heatmap(qtab_sma, cmap='Blues') + plt.title('CASH Q-Table', size=16) + plt.gca().hlines([i + 1 for i in range(len(qtab_sma.index))], xmin=0, xmax=10, linewidth=10, color='white') + plt.xticks(fontsize=15) + plt.yticks(fontsize=14, rotation=0) + plt.gca().tick_params(axis='x', bottom=False, left=False) + plt.gca().tick_params(axis='y', bottom=False, left=False) + plt.show(fig) + + #SHARES + quantile_length = len(results[policy.__name__]['SHARE_quantiles']) + qtab_sma = weighted_average_and_normalize(qtab, state_history, 3, quantile_length) + qtab_sma = qtab_sma.iloc[::-1] + qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SHARE_quantiles'])), 10) + + plt.figure(figsize=(9, 7)) + fig = heatmap(qtab_sma, cmap='Blues') + plt.title('SHARE Q-Table', size=16) + plt.gca().hlines([i + 1 for i in range(len(qtab_sma.index))], xmin=0, xmax=10, linewidth=10, color='white') + plt.xticks(fontsize=15) + plt.yticks(fontsize=14, rotation=0) + plt.gca().tick_params(axis='x', bottom=False, left=False) + plt.gca().tick_params(axis='y', bottom=False, left=False) + plt.show(fig) + + # get markov transition models for policy in policies: @@ -784,8 +778,8 @@ def return_stats(stock='aapl', plt.gca().tick_params(axis='y',bottom=False,left=False) plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white') plt.show(fig) - - + + # plot daily portfolio values plt.figure(figsize=(14,8)) for policy in policies: @@ -828,7 +822,8 @@ def return_stats(stock='aapl', try: del dic['BB_quantiles'] del dic['SMA_quantiles'] - # del dic['MRDR_quantiles'] + del dic['CASH_quantiles'] + del dic['SHARE_quantiles'] except: pass df = pd.DataFrame(dic) @@ -877,7 +872,7 @@ def return_stats(stock='aapl', rets = {policy:returns(results[policy.__name__]['final_vals']) for policy in policies} # generate risk_free return for sharpe ratio - five-year treasury yield - rfs = returns(read_stock('^FVX')) + rfs = returns(read_stock('^FVX')[4:]) # find common indecies between stock tables and treasury yields rfn = set(stock_table.index).intersection(set(rfs.index)) @@ -887,7 +882,7 @@ def return_stats(stock='aapl', rfi = rfr.index # generate baseline return for information ratio - s&p 500 - bls = returns(read_stock('^GSPC')).values + bls = returns(read_stock('^GSPC')[4:]).values # print summary stats for daily returns for policy in policies: diff --git a/src/trainqlearner_util.py b/src/trainqlearner_util.py index aef5d4f..ec0ecf1 100644 --- a/src/trainqlearner_util.py +++ b/src/trainqlearner_util.py @@ -48,23 +48,6 @@ def act(state, q_mat, threshold, actions_size=3): return action -def get_return_since_entry(bought_history, current_adj_close): - ''' - Calculate the returns of current share holdings. - Inputs: - bought_history(list) - current_adj_close(float) - current_day(int) - Output: - return_since_entry(float) - ''' - return_since_entry = 0. - - for b in bought_history: - return_since_entry += (current_adj_close - b) - return return_since_entry - - # In[36]: @@ -108,52 +91,10 @@ def visualize_results(actions_history, returns_since_entry): ax2.scatter(sell_d, sell_p, color='red', label='sell') ax2.legend() -def get_invested_capital(actions_history, returns_since_entry): - ''' - Calculate the max capital being continously invested by the trader - Input: - actions_history(dict): has everydays' actions and close price - returns_since_entry(list): contains every day's return since entry - Output: - return_invest_ratio(float) - ''' - invest = [] - total = 0 - return_invest_ratio = None - for i in range(len(actions_history)): - a = actions_history[i][2] - p = actions_history[i][1] - - try: - next_a = actions_history[i+1][2] - except: - break - if a == 1: - total += p - if next_a != 1 or (i == len(actions_history)-2 and next_a == 1): - invest.append(total) - total = 0 - if invest: - return_invest_ratio = returns_since_entry[-1]/max(invest) - print('invested capital {}, return/invest ratio {}'.format(max(invest), - return_invest_ratio)) - else: - print('no buy transactions, invalid training') - return return_invest_ratio -def get_base_return(data): - ''' - Calculate the benchmark returns of a given stock - Input: - data(dataframe): containing normalized close price and state - Output: - return/invest ratio(float) - ''' - start_price, _ = data[0] - end_price, _ = data[-1] - return (end_price - start_price)/start_price + plt.show() -def train_q_learning(train_data, q, gamma, episodes,sh): +def train_q_learning(train_data, q, cash_states_values, shares_states_values, gamma, episodes, sh): ''' Train a Q-table Inputs: @@ -166,38 +107,53 @@ def train_q_learning(train_data, q, gamma, episodes,sh): actions_history(dict): has everydays' actions and close price returns_since_entry(list): contains every day's return since entry ''' - # create framework for episode-to-episode Q table change tracking; will track MSE between episodes - episode = 0 - q_cur = q.copy() - errs = [] - episode_decile = episodes//10 - # actions_history = [] # num_shares = 0 # bought_history = [] # returns_since_entry = [0] # cash = 100000 - cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles() + + episode = 0 + q_cur = q.copy() + errs = [] + episode_decile = episodes//10 + alpha = 0.1 for ii in range(episodes): + + episode += 1 + if episode == 1 or episode%episode_decile == 0 or episode == episodes: + print('Training episode {}'.format(episode)) + + actions_history = [] cash = 100000 num_shares = 0 if ii > 1: alpha = alpha*0.99995 epsilon = 0.8 - current_portfolio_value = [] + current_portfolio_value = [] + + #add convergence tracking for episode 1 + if episode == 1: + errs_1 = [] + q_cur_1 = q.copy() + + + for i, val in enumerate(train_data): current_adj_close, state = val try: next_adj_close, next_state = train_data[i+1] except: break - + + current_cash_state = d.value_to_state(cash, cash_states_values) current_share_state = d.value_to_state(num_shares, shares_states_values) state = state + current_cash_state + current_share_state + if i >=1: epsilon*= 0.9958 @@ -245,18 +201,16 @@ def train_q_learning(train_data, q, gamma, episodes,sh): # update q table q.loc[state, action] = (1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max())) - print("ARRIVED AT PORTFOLIO VAL") current_portfolio_value.append(cash + num_shares*next_adj_close) # ---- (tentative) start of q-table info plotting/output ----- - q_cur_1 = q.copy() q_last_1 = q_cur_1.copy() - - + q_cur_1 = q.copy() + # add convergence tracking for episode 1 if episode == 1: MSE_1 = np.sum(np.square(q_cur_1-q_last_1).values) - errs_1 += [MSE_1] + errs_1 += [MSE_1] # add convergence tracking for episode 1 if episode == 1: @@ -267,31 +221,30 @@ def train_q_learning(train_data, q, gamma, episodes,sh): x_axis = np.array([i+1 for i in range(len(errs_1))]) plt.plot(x_axis,errs_1) plt.show() - + # calculate MSE between epsiodes - q_cur = q.copy() q_last = q_cur.copy() - - + q_cur = q.copy() + # update MSE tracking MSE = np.sum(np.square(q_cur - q_last).values) - + # plot irregularities if episode > 1: if MSE > errs[-1]*3: print('Episode ' + str(episode) + ' showed irregularity. MSE was ' + str(MSE) + '. Showing big 10 biggest jumps in QTable below.') - + q_diff = (q_cur - q_last).copy() q_diff['colsum'] = q_diff.sum(axis=1) q_diff = q_diff.sort_values('colsum',ascending=False).iloc[:10] print(q_diff.drop(columns=['colsum'])) print('\n\n\n\n') - + errs += [MSE] - + print('End of Training!') - + # plot MSE plt.figure(figsize=(14,8)) plt.title('Q Table Stabilization By Episode',size=25) @@ -299,7 +252,7 @@ def train_q_learning(train_data, q, gamma, episodes,sh): plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) x_axis = np.array([i+1 for i in range(len(errs))]) plt.plot(x_axis,errs) - + # plot MSE for episodes 1-10 if len(errs) >= 10: # plot MSE @@ -310,7 +263,7 @@ def train_q_learning(train_data, q, gamma, episodes,sh): plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) x_axis = np.array([i+1 for i in range(len(errs_new))]) plt.plot(x_axis,errs_new) - + # plot MSE for episodes 11-end if possible if len(errs) >= 10: # plot MSE @@ -325,171 +278,10 @@ def train_q_learning(train_data, q, gamma, episodes,sh): return q, actions_history, current_portfolio_value -# def train_q_learning(train_data, q, alpha, epsilon, epsilon_decay, gamma, episodes,commission,sell_penalty): -# episode = 0 -# ''' -# Train a Q-table -# Inputs: -# train_data(dataframe) -# q(dataframe): initial Q-table -# epsilon(float): threshold of which action strategy to take -# alpha(float): proportion to weight future expected return vs. current return -# gamma(float): discount percentage on the future return -# commission(float): amount charged for stock transaction -# Output: -# q(dataframe): Updated Q-table -# actions_history(dict): has everydays' actions and close price -# returns_since_entry(list): contains every day's return since entry -# ''' -# # create framework for episode-to-episode Q table change tracking; will track MSE between episodes -# q_cur = q.copy() -# errs = [] -# episode_decile = episodes//10 - -# for ii in range(episodes): -# episode +=1 -# if episode == 1 or episode%episode_decile == 0 or episode == episodes: -# print('Training episode {}'.format(episode)) -# actions_history = [] -# num_shares = 0 -# bought_history = [] -# returns_since_entry = [0] -# days = [0] - -# # add convergence tracking for episode 1 -# if episode == 1: -# errs_1 = [] -# q_cur_1 = q.copy() - -# for i, val in enumerate(train_data): -# current_adj_close, state = val -# try: -# next_adj_close, next_state = train_data[i+1] -# except: -# break - -# if len(bought_history) > 0: -# returns_since_entry.append(get_return_since_entry( -# bought_history, current_adj_close)) -# else: -# returns_since_entry.append(returns_since_entry[-1]) - -# # decide action -# ''' -# if alpha > 0.1: -# alpha = alpha/(i+1) -# ''' -# epsilon*=epsilon_decay -# action = act(state, q, threshold=epsilon, actions_size=3) - -# # get reward -# if action == 0: # hold -# if num_shares > 0: -# prev_adj_close, _ = train_data[i-1] -# future = next_adj_close - current_adj_close -# past = current_adj_close - prev_adj_close -# reward = past -# else: -# reward = 0 - -# if action == 1: # buy -# reward = 0-commission -# num_shares += 1 -# bought_history.append((current_adj_close)) - -# if action == 2: # sell -# if num_shares > 0: -# bought_price = bought_history[0] -# reward = (current_adj_close - bought_price) - commission -# bought_history.pop(0) -# num_shares -= 1 - -# else: -# reward = 0 - sell_penalty -# actions_history.append((i, current_adj_close, action)) - -# # update q table -# q.loc[state, action] = ( -# 1.-alpha)*q.loc[state, action] + alpha*(reward+gamma*(q.loc[next_state].max())) - -# # ---- (tentative) start of q-table info plotting/output ----- -# q_last_1 = q_cur_1.copy() -# q_cur_1 = q.copy() - -# # add convergence tracking for episode 1 -# if episode == 1: -# MSE_1 = np.sum(np.square(q_cur_1-q_last_1).values) -# errs_1 += [MSE_1] - -# # add convergence tracking for episode 1 -# if episode == 1: -# plt.figure(figsize=(14,8)) -# plt.title('Q Table Stabilization Within Episode 1',size=25) -# plt.xlabel('Day Number',size=20) -# plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) -# x_axis = np.array([i+1 for i in range(len(errs_1))]) -# plt.plot(x_axis,errs_1) -# plt.show() - -# # calculate MSE between epsiodes -# q_last = q_cur.copy() -# q_cur = q.copy() - -# # update MSE tracking -# MSE = np.sum(np.square(q_cur - q_last).values) - -# # plot irregularities -# if episode > 1: -# if MSE > errs[-1]*3: - -# print('Episode ' + str(episode) + ' showed irregularity. MSE was ' + str(MSE) + '. Showing big 10 biggest jumps in QTable below.') - -# q_diff = (q_cur - q_last).copy() -# q_diff['colsum'] = q_diff.sum(axis=1) -# q_diff = q_diff.sort_values('colsum',ascending=False).iloc[:10] -# print(q_diff.drop(columns=['colsum'])) -# print('\n\n\n\n') - -# errs += [MSE] - -# print('End of Training!') - -# # plot MSE -# plt.figure(figsize=(14,8)) -# plt.title('Q Table Stabilization By Episode',size=25) -# plt.xlabel('Episode Number',size=20) -# plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) -# x_axis = np.array([i+1 for i in range(len(errs))]) -# plt.plot(x_axis,errs) - -# # plot MSE for episodes 1-10 -# if len(errs) >= 10: -# # plot MSE -# errs_new = errs[:10] -# plt.figure(figsize=(14,8)) -# plt.title('Q Table Stabilization By Episode (Episodes 1-10)',size=25) -# plt.xlabel('Episode Number',size=20) -# plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) -# x_axis = np.array([i+1 for i in range(len(errs_new))]) -# plt.plot(x_axis,errs_new) - -# # plot MSE for episodes 11-end if possible -# if len(errs) >= 10: -# # plot MSE -# errs_new = errs[11:] -# plt.figure(figsize=(14,8)) -# plt.title('Q Table Stabilization By Episode (Episodes 11-End)',size=25) -# plt.xlabel('Episode Number',size=20) -# plt.ylabel('Mean Squared Difference Between Current & Last QTable',size=14) -# x_axis = np.array([i+11 for i in range(len(errs_new))]) -# plt.plot(x_axis,errs_new) - - -# return q, actions_history, returns_since_entry + #def trainqlearner(start_date, end_date, ticker,alpha=0.01, epsilon=0.2, epsilon_decay = .99995, gamma=0.95, episodes=500,commission=0,sell_penalty=0): -#def trainqlearner(train_data,start_date, end_date, ticker, q, gamma=0.95, episodes=200, sh = 50): -def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 50): +def trainqlearner(ticker, start_date, end_date, window, gamma, episodes, sh): # Split the data into train and test data set train_df = d.get_stock_data(ticker, start_date, end_date) @@ -497,42 +289,36 @@ def trainqlearner(start_date, end_date, ticker, gamma=0.95, episodes=200, sh = 5 all_actions = {0: 'hold', 1: 'buy', 2: 'sell'} # create_df = normalized predictors norm_bb_width, norm_adj_close, norm_close_sma_ratio - print("START create_df") - train_df = d.create_df(train_df, 3) - print("END create_df") + train_df = d.create_df(train_df, window) # get_states = States Dictionary after discretizing by converting continuous values to integer state percent_b_states_values, close_sma_ratio_states_value = d.get_states( train_df) + # Create_state_df = Add state information to the DF train_df = d.create_state_df( train_df, percent_b_states_values, close_sma_ratio_states_value) + #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value) - # New cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles() # Return a list of strings representing the combination of all the states all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values) # all_states = d.get_all_states(None, percent_b_states_values, close_sma_ratio_states_value) + states_size = len(all_states) # Preparation of the Q Table - print("START q_init") - q_init = initialize_q_mat(all_states, all_actions)/1e9 - print("END q_init") - print(q_init) - - train_data = np.array(train_df[['norm_adj_close', 'state']]) + + q = initialize_q_mat(all_states, all_actions)/1e5 - print("START train_q_learning") - q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q_init, gamma=0.95, episodes=1, sh = 50) - print("END train_q_learning") + train_data = np.array(train_df[['Adj Close', 'state']]) + + - # Specify quantiles - BB_quantiles = percent_b_states_values - SMA_ratio_quantiles = close_sma_ratio_states_value - cash_quantiles, holdings_quantiles = d.create_cash_and_holdings_quantiles() + q, train_actions_history, train_returns_since_entry = train_q_learning(train_data, q, cash_states_values, shares_states_values, gamma, episodes, sh) + - return q, percent_b_states_values, SMA_ratio_quantiles, cash_quantiles, holdings_quantiles + return q, train_actions_history, train_returns_since_entry, percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values