Skip to content

Commit

Permalink
#55 Added 2 new states (cash, holdings) removed mrdr, included always…
Browse files Browse the repository at this point in the history
… buy benchmark, and new reward definition
  • Loading branch information
mariemayadi committed Dec 5, 2020
1 parent bf641c0 commit 34d0626
Show file tree
Hide file tree
Showing 4 changed files with 592 additions and 251 deletions.
94 changes: 35 additions & 59 deletions src/data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,43 +141,6 @@ def get_adj_close_sma_ratio(values, window):
ratio = values/rm
return ratio.apply(lambda x: round(x, 5))

def get_mrdr(values,baseline,test=True):
'''
Returns the market relative daily return over the window:
INPUTS:
values(pandas series)
window(int): time period to consider
test(bool): whether this is testing period (and only the past few days' data is needed) - this flag will speed up testing
OUTPUTS:
market relative daily return(series)
'''
if test:
valnew = values.iloc[-3:]
mx = valnew.index.max()
gspc_temp = baseline[baseline.index <= mx].iloc[-3:]

else:
valnew = values
mx = valnew.index.max()
gspc_temp = baseline[baseline.index <= mx]

gspc = gspc_temp.reindex(valnew.index).fillna(method='ffill')

checkhist(gspc,valnew)

if not gspc.index.equals(valnew.index):
raise ValueError('Stock indecies do not match')

gspc_rets = returns(gspc)
stock_rets = returns(valnew)

mrdr = stock_rets / gspc_rets

if not gspc.index.equals(valnew.index) and values.index[-1] == mrdr.index[-1]:
raise ValueError('Stock indecies do not match')

return mrdr


def discretize(values, num_states=4):
'''
Expand All @@ -198,6 +161,26 @@ def discretize(values, num_states=4):
states_value[num_states] = float('inf')
return states_value

def create_cash_and_holdings_quantiles():
# CASH (State 3)
cash_list = [*range(1,10)]
cash_list = [int(180000/9)*each for each in cash_list]

cash_states_values = {}
for i in range(len(cash_list)):
cash_states_values[i] = cash_list[i]
cash_states_values[9] = float("inf")

# HOLDINGS = Num Shares (State 4)
shares_list = [*range(1,10)]
shares_list = [int(252/9)*each for each in shares_list]

shares_states_values = {}
for i in range(len(shares_list)):
shares_states_values[i] = shares_list[i]
shares_states_values[9] = float("inf")

return cash_states_values, shares_states_values

def value_to_state(value, states_value):
'''
Expand Down Expand Up @@ -232,15 +215,12 @@ def create_df(df, window=45):
close_sma_ratio = get_adj_close_sma_ratio(df['Adj Close'], window)
# get the upper and lower BB values
upper, lower = get_upper_lower_bands(df['Adj Close'], window)
# get mrdr
baseline = read_stock('^GSPC','2007-01-01','2016-12-31')
mrdr = get_mrdr(df['Adj Close'],baseline,test=False)

# create bb measure, close-sma-ratio columns
df['close_sma_ratio'] = close_sma_ratio
df['upper_bb'] = upper
df['lower_bb'] = lower
df['mrdr'] = mrdr

# drop missing values
df.dropna(inplace=True)
Expand Down Expand Up @@ -276,13 +256,11 @@ def get_states(df):
}

close_sma_ratio_states_value = discretize(df['norm_close_sma_ratio'])

mrdr_value = discretize(df['mrdr'])

return percent_b_states_values, close_sma_ratio_states_value, mrdr_value
return percent_b_states_values, close_sma_ratio_states_value


def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value,mrdr_value):
def create_state_df(df, bb_states_value, close_sma_ratio_states_value):
'''
Add a new column to hold the state information to the dataframe
Inputs:
Expand All @@ -293,20 +271,17 @@ def create_state_df(df, percent_b_states_values, close_sma_ratio_states_value,mr
Output:
df(dataframe)
'''
df['percent_b_state'] = df['percent_b'].apply(
lambda x: value_to_state(x, percent_b_states_values))
df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(
lambda x: value_to_state(x, close_sma_ratio_states_value))
df['mrdr_state'] = df['mrdr'].apply(
lambda x: value_to_state(x, mrdr_value))

df['state'] = df['norm_close_sma_ratio_state'] + df['percent_b_state'] + df['mrdr_state']

#df['norm_bb_width_state'] = df['norm_bb_width'].apply(lambda x : value_to_state(x, bb_states_value)) #2
df['norm_close_sma_ratio_state'] = df['norm_close_sma_ratio'].apply(lambda x : value_to_state(x, close_sma_ratio_states_value))
df['percent_b_state'] = df['percent_b'].apply(lambda x : value_to_state(x, percent_b_states_values))
#df['norm_adj_close_state'] = df['norm_adj_close'].apply(lambda x : value_to_state(x, price_states_value))

#df['state'] = df['norm_close_sma_ratio_state'] + df['norm_bb_width_state']
df['state'] = df['norm_close_sma_ratio_state'] + df['percent_b_state']
df.dropna(inplace=True)
return df


def get_all_states(percent_b_states_values, close_sma_ratio_states_value,mrdr_value):
def get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values):
'''
Combine all the states from the discretized
norm_adj_close, norm_close_sma_ratio columns.
Expand All @@ -320,10 +295,11 @@ def get_all_states(percent_b_states_values, close_sma_ratio_states_value,mrdr_va
states = []
for c, _ in close_sma_ratio_states_value.items():
for b, _ in percent_b_states_values.items():
for m, _ in mrdr_value.items():
state = str(c) + str(b) + str(m)
states.append(str(state))

for m, _ in cash_states_values.items():
for s, _ in shares_states_values.items():
state = str(c) + str(b) + str(m) + str(s)
states.append(str(state))

return states

def weighted_average_and_normalize(qtable,state_history,state_num,quantile_length):
Expand Down
113 changes: 104 additions & 9 deletions src/sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#TODO: make this a single function call
ticker = 'JPM'

np.random.seed(0)
np.random.seed(1000)
start = '2017-01-01'
end = '2019-12-31'

Expand All @@ -26,10 +26,14 @@
q, bb_states_value, SMA_ratio_quantiles, MRDR_values = tu.trainqlearner(start_date, end_date, ticker)
q.columns = ['HOLD', 'BUY', 'SELL']
bb_ = list(bb_states_value.values())

print(bb_)

sma_ = list(SMA_ratio_quantiles.values())
mrdr_ = list(MRDR_values.values())

# Fixing the range problem

# q.iloc[0] = q.iloc[0] * 1e-16
#nq = (q - q.mean()) / q.std()
nq=q
Expand Down Expand Up @@ -297,6 +301,90 @@ def rule_based(stock_table,money,inc, original_shares,commission):
return results


# function to buy stock every day
def buy_always(stock_table,money,inc,original_shares,commission):
'''
enacts buy_always strategy
Inputs
stock_table: list of daily stock or portfolio values
money: original cash held
inc: increment of buy/sell permitted
original_shares: original number of shares held
Output
results: dictionary holding...
*one Pandas series each (key/Series names are identical) for*
final_vals: final daily values of portfolio
actions: daily actions taken ("BUY" "SELL" "HOLD")
shares: daily number of shares of stock held
cash: daily amount of cash held
*additionally*
qtable: returns None (does not apply to this strategy)
'''

# record original value
original_val = money + (stock_table.values[0]*original_shares) # initial cash

# generate table of returns
ret = returns(stock_table)

# create actions table
actions = ['HOLD']

# create shares table
shares = stock_table.copy()
shares.iloc[0] = original_shares

# create markov transition matrix
markov = pd.DataFrame(np.zeros((3,3)),index=action_list,columns=action_list)

# create cash table
cash = stock_table.copy()
cash.iloc[0] = money

# calculate daily portfolio value
final_vals = stock_table.copy()
final_vals.iloc[0] = original_val

# iterate through days
for i in range(1,stock_table.shape[0]):
j = i-1 # last day
cur_cash = cash.values[j] # current cash
cur_shares = shares.values[j] # current shares
final_vals.iloc[i] = cur_cash + (cur_shares*stock_table.values[i]) # end of day portfolio value
cur_price = stock_table.values[j]

# if you can't buy, hold
if cur_cash < (cur_price*inc):
act = 'HOLD'

# else buy
else:
act = 'BUY'

# take action
if act == 'HOLD':
cash.iloc[i] = cash.values[j]
shares.iloc[i] = shares.values[j]
if act == 'BUY':
cash.iloc[i] = cash.values[j] - (inc*cur_price) - commission
shares.iloc[i] = shares.values[j] + inc

actions += [act]

# increment markov
markov.loc[actions[j],actions[i]] +=1

actions = pd.Series(actions,index=stock_table.index)

# normalize markov
markov = markov.divide(markov.sum(axis=1),axis=0).round(2)

results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov,'state_history':None}
return results

# function to choose action based on OLS of returns looking back to trading days t-6 to t-1
def ols(stock_table,money,inc, original_shares,commission):
'''
Expand Down Expand Up @@ -412,7 +500,7 @@ def ols(stock_table,money,inc, original_shares,commission):
return results

# def qlearner(stock_table,money,inc, original_shares,qtable=ql[0], BB_quantiles=ql[1], SMA_quantiles=ql[2],window=window):
def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=3): # defining defaults here prevents need for args to be passed in return_stats function
def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_quantiles= bb_ , SMA_quantiles = sma_, MRDR_quantiles=mrdr_, window=5): # defining defaults here prevents need for args to be passed in return_stats function
'''
Enacts qlearning
Expand All @@ -439,6 +527,8 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
'''

# record original value
print(stock_table[1])

original_val = money + (stock_table.values[0]*original_shares) # initial cash

# generate table of returns
Expand Down Expand Up @@ -479,14 +569,19 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
if i > window: # if we have enough of a lookback window to calculate stats

# find yesterday's final bollinger band value
bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j]
upper, lower = d.get_upper_lower_bands(stock_table.iloc[:i], window)
bb = ((stock_table.iloc[:i] - lower) * 100 / (upper - lower)).iloc[j]


#bb = d.get_bollinger_bands(stock_table.iloc[:i],window).iloc[j]

# find yesterday's final bollinger band quantile
if bb != float('inf'):
bbq = np.argwhere(np.where(BB_quantiles>bb,1,0))[0][0]
else:
bbq = len(BB_quantiles) - 1


# find current SMA value
sma = d.get_adj_close_sma_ratio(stock_table.iloc[:i],window).iloc[j]

Expand All @@ -506,7 +601,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
mrq = len(MRDR_quantiles) - 1

# find state based on these two pieces of information
state = str(bbq) + str(smq) + str(mrq)
state = str(smq) + str(bbq) + str(mrq)

# locate *optimal* action from Q table, which we will then examine to see if it's possible
# print("STATE: ", state, str(bbq), str(smq))
Expand All @@ -532,7 +627,7 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu

# if you can't buy or sell, hold
if cur_shares < inc and cur_cash < (cur_price*inc):
cur_act = 'HOLD'
act = 'HOLD'

# if you can't sell, but you can buy... buy if it makes sense, or hold if it doesn't
elif cur_shares < inc:
Expand Down Expand Up @@ -572,12 +667,12 @@ def qlearner(stock_table,money,inc, original_shares, commission,qtable=nq, BB_qu
return results

# function to return stats and graphs
def return_stats(stock='jpm',
def return_stats(stock='aapl',
commission = 2,
money=100000,
#inc=10,- can read this argument and change code below if doing absolute share-based
#original_shares=100, - can read this argument and change code below if doing absolute share-based
policies=[hold,random_action,rule_based,ols,qlearner]):
policies=[hold,random_action,rule_based,ols,buy_always,qlearner]):

'''
Enacts every strategy and provides summary statistics and graphs
Expand Down Expand Up @@ -628,7 +723,7 @@ def return_stats(stock='jpm',
quantile_length = len(results[policy.__name__]['BB_quantiles'])
qtab = results[policy.__name__]['qtable']

qtab_bb = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
qtab_bb = weighted_average_and_normalize(qtab, state_history, 1, quantile_length)
qtab_bb = qtab_bb.iloc[::-1] # reverse order of rows for visualization purposes - now biggest value will be on top
qtab_bb.index = np.round(np.flip(np.array(results[policy.__name__]['BB_quantiles'])),5) # define index as bb quantiles, reversing quantile order in kind so biggest value is first

Expand All @@ -646,7 +741,7 @@ def return_stats(stock='jpm',

# marginalize over SMA
# TODO - determine if this mean was taken correctly
qtab_sma = weighted_average_and_normalize(qtab, state_history, 1, quantile_length)
qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
qtab_sma = qtab_sma.iloc[::-1]
qtab_sma.index = np.round(np.flip(np.array(results[policy.__name__]['SMA_quantiles'])),5)

Expand Down
Loading

0 comments on commit 34d0626

Please sign in to comment.