diff --git a/src/data_process.py b/src/data_process.py index 0a9f31f..a0269a0 100644 --- a/src/data_process.py +++ b/src/data_process.py @@ -108,6 +108,7 @@ def get_stock_data(symbol, start, end): df = pd.read_csv('train.csv',index_col='Date') df.index=pd.to_datetime(df.index) ''' + # df.to_csv("../data/current.csv") return df diff --git a/src/dqn/.gitignore b/src/dqn/.gitignore new file mode 100644 index 0000000..ca64f85 --- /dev/null +++ b/src/dqn/.gitignore @@ -0,0 +1,6 @@ +.git +__pycache__ +.ipynb_checkpoints + +models +runs \ No newline at end of file diff --git a/src/dqn/README.md b/src/dqn/README.md new file mode 100644 index 0000000..b57ef6f --- /dev/null +++ b/src/dqn/README.md @@ -0,0 +1,30 @@ +# Q-Trader + +** Use in your own risk ** + +Pytorch implmentation from q-trader(https://github.com/edwardhdlu/q-trader) + +## Results + +Some examples of results on test sets: + +![HSI2018](images/%5EHSI_2018.png) +Starting Capital: $100,000. +HSI, 2017-2018. Profit of $10702.13. + +## Running the Code + +To train the model, download a training and test csv files from [Yahoo! Finance](https://ca.finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC) into `data/` +``` +mkdir models +python train ^GSPC 10 1000 +``` + +Then when training finishes (minimum 200 episodes for results): +``` +jupyter notebook -> visualize.ipynb +``` + +## References + +[Deep Q-Learning with Keras and Gym](https://keon.io/deep-q-learning/) - Q-learning overview and Agent skeleton code diff --git a/src/dqn/agent/__init__.py b/src/dqn/agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/dqn/agent/agent.py b/src/dqn/agent/agent.py new file mode 100644 index 0000000..81f4952 --- /dev/null +++ b/src/dqn/agent/agent.py @@ -0,0 +1,88 @@ +from agent.memory import Transition, ReplayMemory +from agent.model import DQN + +import numpy as np +import random +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +import os + +# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +device = torch.device("cpu") +# print(device) + +class Agent: + def __init__(self, state_size, is_eval=False): + self.state_size = state_size # normalized previous days + self.action_size = 3 # sit, buy, sell + self.memory = ReplayMemory(10000) + self.inventory = [] + self.is_eval = is_eval + + self.gamma = 0.95 + self.epsilon = 1.0 + self.epsilon_min = 0.01 + self.epsilon_decay = 0.99995 + self.batch_size = 32 + if os.path.exists('models/target_model'): + self.policy_net = torch.load('models/policy_model', map_location=device) + self.target_net = torch.load('models/target_model', map_location=device) + else: + self.policy_net = DQN(state_size, self.action_size) + self.target_net = DQN(state_size, self.action_size) + self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=0.005, momentum=0.9) + + def act(self, state): + if not self.is_eval and np.random.rand() <= self.epsilon: + return random.randrange(self.action_size) + + tensor = torch.FloatTensor(state).to(device) + options = self.target_net(tensor) + return np.argmax(options[0].detach().numpy()) + + def optimize(self): + if len(self.memory) < self.batch_size: + return + transitions = self.memory.sample(self.batch_size) + # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for + # detailed explanation). This converts batch-array of Transitions + # to Transition of batch-arrays. + batch = Transition(*zip(*transitions)) + + # Compute a mask of non-final states and concatenate the batch elements + # (a final state would've been the one after which simulation ended) + next_state = torch.FloatTensor(batch.next_state).to(device) + non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, next_state))) + non_final_next_states = torch.cat([s for s in next_state if s is not None]) + state_batch = torch.FloatTensor(batch.state).to(device) + action_batch = torch.LongTensor(batch.action).to(device) + reward_batch = torch.FloatTensor(batch.reward).to(device) + + # Compute Q(s_t, a) - the model computes Q(s_t), then we select the + # columns of actions taken. These are the actions which would've been taken + # for each batch state according to policy_net + + # print(state_batch.shape, action_batch.shape, self.batch_size) + state_action_values = self.policy_net(state_batch).reshape((self.batch_size, 3)).gather(1, action_batch.reshape((self.batch_size, 1))) + + # Compute V(s_{t+1}) for all next states. + # Expected values of actions for non_final_next_states are computed based + # on the "older" target_net; selecting their best reward with max(1)[0]. + # This is merged based on the mask, such that we'll have either the expected + # state value or 0 in case the state was final. + next_state_values = torch.zeros(self.batch_size, device=device) + next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach() + # Compute the expected Q values + expected_state_action_values = (next_state_values * self.gamma) + reward_batch + + # Compute Huber loss + loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) + + # Optimize the model + self.optimizer.zero_grad() + loss.backward() + for param in self.policy_net.parameters(): + param.grad.data.clamp_(-1, 1) + self.optimizer.step() \ No newline at end of file diff --git a/src/dqn/agent/memory.py b/src/dqn/agent/memory.py new file mode 100644 index 0000000..dc4da08 --- /dev/null +++ b/src/dqn/agent/memory.py @@ -0,0 +1,26 @@ +import random +from collections import namedtuple + +Transition = namedtuple('Transition', + ('state', 'action', 'next_state', 'reward')) + + +class ReplayMemory(object): + + def __init__(self, capacity): + self.capacity = capacity + self.memory = [] + self.position = 0 + + def push(self, *args): + """Saves a transition.""" + if len(self.memory) < self.capacity: + self.memory.append(None) + self.memory[self.position] = Transition(*args) + self.position = (self.position + 1) % self.capacity + + def sample(self, batch_size): + return random.sample(self.memory, batch_size) + + def __len__(self): + return len(self.memory) diff --git a/src/dqn/agent/model.py b/src/dqn/agent/model.py new file mode 100644 index 0000000..e0c5d5f --- /dev/null +++ b/src/dqn/agent/model.py @@ -0,0 +1,18 @@ +import torch +import torch.nn as nn + +class DQN(nn.Module): + def __init__(self, state_size, action_size): + super(DQN, self).__init__() + self.main = nn.Sequential( + nn.Linear(state_size, 64), + nn.LeakyReLU(0.01, inplace=True), + nn.Linear(64, 32), + nn.LeakyReLU(0.01, inplace=True), + nn.Linear(32, 8), + nn.LeakyReLU(0.01, inplace=True), + nn.Linear(8, action_size), + ) + + def forward(self, input): + return self.main(input) diff --git a/src/dqn/evaluate.py b/src/dqn/evaluate.py new file mode 100644 index 0000000..7e6969c --- /dev/null +++ b/src/dqn/evaluate.py @@ -0,0 +1,164 @@ +import torch +import numpy as np +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt +import numpy as np + +from agent.agent import Agent +from functions import * +import pandas as pd + +# stock_name = '^HSI_2018' +window_size = 5 + +''' +agent = Agent(window_size, True) +data, adj_close = getStockDataVec(stock_name) +l = len(data) - 1 +batch_size = 32 +episode_count = 1000 +''' +action_map = {0:"HOLD", 1: "BUY", 2:"SELL"} +def DQN(stock_table=None, money= None, inc= None, original_shares= None, commission= None): + + window_size=3 + cash, num_shares = 100_000, 0 + sh = 50 + agent = Agent(window_size) + data, adj_close, date = getStockDataVec(path = "../data/test_dqn_data.csv") + l = len(adj_close) - 1 + batch_size = 32 + episode_count = 5 + + + + closes = [] + buys = [] + sells = [] + + + final_vals, actions, shares, cashes, dates = [], [], [], [], [] + + episode_count=1 + for e in range(episode_count): + closes = [] + buys = [] + sells = [] + + + state = getState(data, 0, window_size + 1) + total_profit = 0 + agent.inventory = [] + + + # capital = 100000 + for t in range(l): + #action = agent.act(state) + action = np.random.randint(0, 3) + closes.append(data[t]) + + # sit + next_state = getState(data, t + 1, window_size + 1) + reward = 0 + ''' + if action == 1: # buy + if capital > adj_close[t]: + agent.inventory.append(adj_close[t]) + buys.append(adj_close[t]) + sells.append(None) + capital -= adj_close[t] + else: + buys.append(None) + sells.append(None) + + elif action == 2: # sell + if len(agent.inventory) > 0: + bought_price = agent.inventory.pop(0) + reward = max(adj_close[t] - bought_price, 0) + total_profit += adj_close[t] - bought_price + buys.append(None) + sells.append(adj_close[t]) + capital += adj_close[t] + else: + buys.append(None) + sells.append(None) + elif action == 0: + buys.append(None) + sells.append(None) + ''' + + next_adj_close = adj_close[t+1] + current_adj_close = adj_close[t] + + # get reward + if action == 0: # hold + if num_shares > 0: + next_cash = cash # no change + reward = (cash + num_shares * next_adj_close) - (cash + num_shares * current_adj_close) + else: + reward = 0 + + if action == 1: # buy + if cash > sh * current_adj_close: + next_cash = cash - sh * current_adj_close + # reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close) + reward = (next_cash + ((num_shares + sh) * next_adj_close)) - (cash + num_shares * current_adj_close) + num_shares += sh + cash = next_cash + else: + reward = 0 + + if action == 2: # sell + if num_shares > 0: + next_cash = cash + sh * current_adj_close + # reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close) + reward = (next_cash + ((num_shares - sh) * next_adj_close)) - (cash + num_shares * current_adj_close) + num_shares -= sh + cash = next_cash + else: + reward = 0 + + + + done = True if t == l - 1 else False + agent.memory.push(state, action, next_state, reward) + state = next_state + + ''' + if done: + print("--------------------------------") + print(" Total Profit: " + formatPrice(total_profit)) + print(" Total Shares: ", ) + print("--------------------------------") + ''' + if done: + print("--------------------------------") + print("Total Profit: " + formatPrice(total_profit)) + print("Total Reward: ", reward) + print("Total shares: ", num_shares) + print("Total cash: ", cash) + print("--------------------------------") + + + cur_cash, cur_shares = cash, num_shares + final_vals.append(cur_cash + (cur_shares * adj_close[t])) + cashes.append(cur_cash) + actions.append(action_map[action]) + shares.append(num_shares) + dates.append(date[t]) + + cashes = pd.Series(cashes,index = pd.to_datetime(dates)) + shares = pd.Series(shares,index = pd.to_datetime(dates)) + actions = pd.Series(actions,index = pd.to_datetime(dates)) + final_vals = pd.Series(final_vals,index = pd.to_datetime(dates)) + + results = {'final_vals': final_vals, 'actions': actions, 'shares': shares, 'cash': cashes} + + + return results + +if __name__=="__main__": + res = DQN() + dic = pd.DataFrame(res) + print(dic) \ No newline at end of file diff --git a/src/dqn/functions.py b/src/dqn/functions.py new file mode 100644 index 0000000..39126bc --- /dev/null +++ b/src/dqn/functions.py @@ -0,0 +1,23 @@ +import numpy as np +import math + +# prints formatted price +def formatPrice(n): + return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n)) + +def getStockDataVec(key=None,path=None): + vec, states, date = [], [], [] + lines = open(path, "r").read().splitlines() + for line in lines[5:]: + row = line.split(",") + close = row[6] + if close != 'null': + date.append(row[0]) + vec.append(float(row[6])) + states.append(list(map(float, row[11:13]))) + + return states, vec, date + +# returns an an n-day state representation ending at time t +def getState(data, t, n): + return np.array([data[t]]) diff --git a/src/dqn/train.py b/src/dqn/train.py new file mode 100644 index 0000000..d8cf726 --- /dev/null +++ b/src/dqn/train.py @@ -0,0 +1,119 @@ +from agent.agent import Agent +from agent.memory import Transition, ReplayMemory +from functions import * +import sys +import torch + + +device = torch.device("cpu") + +# if torch.cuda.is_available(): +# device = torch.device("cuda:0") # you can continue going on here, like cuda:1 cuda:2....etc. +# print("Running on the GPU") +# else: +# device = torch.device("cpu") +# print("Running on the CPU") + +# if len(sys.argv) != 4: +# print("Usage: python train.py [stock] [window] [episodes]") +# exit() + +# stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) + +window_size, episode_count = 2, 100 +cash, num_shares, sh = 100_000, 0, 50 +state_size = 2 +agent = Agent(state_size) +data, adj_close, date = getStockDataVec(path="../../data/train_dqn_data.csv") +l = len(adj_close) - 1 + + +for e in range(episode_count + 1): + print("Episode " + str(e) + "/" + str(episode_count)) + state = getState(data, 0, window_size + 1) + + total_profit = 0 + agent.inventory = [] + + for t in range(l): + + action = agent.act(state) + next_state = getState(data, t + 1, window_size + 1) + reward = 0 + + ''' + # sit + if num_shares>0: + reward = (adj_close[t+1] - adj_close[t]) * num_shares + else: + reward = 0 + + if action == 1: # buy + # agent.inventory.append(data[t]) + agent.inventory.append(adj_close[t]) + num_shares+=1 + reward = 0 + # print("Buy: " + formatPrice(data[t])) + + elif action == 2 and len(agent.inventory) > 0: # sell + bought_price = agent.inventory.pop(0) + # reward = max(data[t] - bought_price, 0) + # total_profit += data[t] - bought_price + # print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price)) + + reward = (adj_close[t]-bought_price)*num_shares + num_shares-=1 + total_profit += adj_close[t] - bought_price + ''' + + next_adj_close = adj_close[t+1] + current_adj_close = adj_close[t] + + + # get reward + if action == 0: # hold + if num_shares > 0: + next_cash = cash # no change + reward = (cash + num_shares * next_adj_close) - (cash + num_shares * current_adj_close) + else: + reward = 0 + + if action == 1: # buy + if cash > sh * current_adj_close: + next_cash = cash - sh * current_adj_close + # reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close) + reward = (next_cash + ((num_shares + sh) * next_adj_close)) - (cash + num_shares * current_adj_close) + num_shares += sh + cash = next_cash + else: + reward = 0 + + if action == 2: # sell + if num_shares > 0: + next_cash = cash + sh * current_adj_close + # reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close) + reward = (next_cash + ((num_shares - sh) * next_adj_close)) - (cash + num_shares * current_adj_close) + num_shares -= sh + cash = next_cash + else: + reward = 0 + + + done = True if t == l - 1 else False + agent.memory.push(state, action, next_state, reward) + state = next_state + + if done: + print("--------------------------------") + print("Total Profit: " + formatPrice(total_profit)) + print("Total Reward: ", reward) + print("Total shares: ", num_shares) + print("Total cash: ", cash) + print("--------------------------------") + + agent.optimize() + + if e % 10 == 0: + agent.target_net.load_state_dict(agent.policy_net.state_dict()) + torch.save(agent.policy_net, "models/policy_model") + torch.save(agent.target_net, "models/target_model") \ No newline at end of file diff --git a/src/sim.py b/src/sim.py index 0bfcd42..284fbef 100644 --- a/src/sim.py +++ b/src/sim.py @@ -13,6 +13,11 @@ import time import pandas_datareader.data as web # fetch stock data +import sys +# sys.path.insert(1, "C:/Users/amogh/Appledore/Fall-2020/Capstone/q-trading-pytorch/") +from evaluate import DQN + + #TODO: make this a single function call ticker = 'JPM' @@ -20,6 +25,12 @@ start = '2017-01-01' end = '2019-12-31' +dqn_data = d.get_stock_data(ticker,start,end) +dqn_data.to_csv("../data/test_dqn_data.csv") + +dqn_result = DQN() + + start_date = dt.datetime(2007, 1, 1) end_date = dt.datetime(2016, 12, 31) @@ -294,7 +305,6 @@ def rule_based(stock_table,money,inc, original_shares,commission): # normalize markov markov = markov.divide(markov.sum(axis=1),axis=0).round(2) - results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov, 'state_history': None} return results @@ -586,7 +596,7 @@ def qlearner(stock_table,money,inc, original_shares, commission, q_table = nq, t temp['shares'] = curr_shares temp['shares_state'] = curr_shares_s temp['state'] = final_states - temp.to_csv('./data/viz_data.csv') + temp.to_csv('../data/viz_data.csv') actions = pd.Series(act_list, index=stock_table.index) @@ -608,7 +618,7 @@ def return_stats(stock='jpm', money=100000, #inc=10,- can read this argument and change code below if doing absolute share-based #original_shares=100, - can read this argument and change code below if doing absolute share-based - policies=[hold,random_action,rule_based,ols,buy_always,qlearner]): + policies=[hold,random_action,rule_based,ols,buy_always,qlearner,enact_dqn]): ''' Enacts every strategy and provides summary statistics and graphs @@ -655,6 +665,9 @@ def return_stats(stock='jpm', original_shares = original_shares, commission = commission) for policy in policies} + + # results['DQN'] = dqn_result + actions_history = results['qlearner']['actions_history'] days, prices, actions = [], [], [] @@ -683,15 +696,9 @@ def return_stats(stock='jpm', holds.to_csv('./data/hold_data.csv') - - - - - - # plot qtables only for qlearner (or any other strategies with Q table) - for policy in policies: - if results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies + for policy in policies[:-1]: + if 'qtable' in results[policy.__name__] and results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies # get state history and quantile length and qtable for normalization and averaging function state_history = results[policy.__name__]['state_history'] @@ -714,7 +721,7 @@ def return_stats(stock='jpm', plt.gca().tick_params(axis='y',bottom=False,left=False) plt.show(fig) - # marginalize over SMA + # marginalize over SMA # TODO - determine if this mean was taken correctly quantile_length = len(results[policy.__name__]['SMA_quantiles']) qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length) @@ -766,24 +773,26 @@ def return_stats(stock='jpm', # get markov transition models - for policy in policies: - plt.figure(figsize=(6,3)) - plt.title('Transition Matrix For '+policy.__name__,size=16) - mkv = results[policy.__name__]['markov'] - fig = heatmap(mkv,annot=True,annot_kws={'size':14},cmap='Greens',cbar=False) - plt.xticks(fontsize=14) - plt.yticks(fontsize=14,rotation=0) - plt.gca().set(xlabel='Current Trading Day', ylabel='Last Trading Day') - plt.gca().tick_params(axis='x',bottom=False,left=False) - plt.gca().tick_params(axis='y',bottom=False,left=False) - plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white') - plt.show(fig) + for policy in policies[:-1]: + if 'markov' in results[policy.__name__]: + plt.figure(figsize=(6,3)) + plt.title('Transition Matrix For '+policy.__name__,size=16) + mkv = results[policy.__name__]['markov'] + fig = heatmap(mkv,annot=True,annot_kws={'size':14},cmap='Greens',cbar=False) + plt.xticks(fontsize=14) + plt.yticks(fontsize=14,rotation=0) + plt.gca().set(xlabel='Current Trading Day', ylabel='Last Trading Day') + plt.gca().tick_params(axis='x',bottom=False,left=False) + plt.gca().tick_params(axis='y',bottom=False,left=False) + plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white') + plt.show(fig) # plot daily portfolio values plt.figure(figsize=(14,8)) - for policy in policies: + for policy in policies[-1:]: plt.plot(results[policy.__name__]['final_vals'],label = policy.__name__) + # plt.plot(results['DQN']['final_vals'], label = 'DQN') plt.legend() plt.xlabel("Date",fontsize=20) plt.ylabel("Portfolio Value ($)",fontsize=20) @@ -793,7 +802,10 @@ def return_stats(stock='jpm', # plot daily cash values plt.figure(figsize=(14,8)) for policy in policies: + print(results[policy.__name__]['cash']) plt.plot(results[policy.__name__]['cash'],label = policy.__name__) + + # plt.plot(results['DQN']['cash'], label='DQN') plt.legend() plt.xlabel("Date",fontsize=20) plt.ylabel("Cash Held ($)",fontsize=20) @@ -803,7 +815,10 @@ def return_stats(stock='jpm', # plot daily shares plt.figure(figsize=(14,8)) for policy in policies: + # print(results[policy.__name__]['shares']) plt.plot(results[policy.__name__]['shares'],label = policy.__name__) + + # plt.plot(results['DQN']['shares'], label='DQN') plt.legend() plt.xlabel("Date",fontsize=20) plt.ylabel("Shares Held",fontsize=20) @@ -811,15 +826,16 @@ def return_stats(stock='jpm', plt.show() # plot daily portfolio values - for i, policy in enumerate(policies): + for i, policy in enumerate(policies[-1:]): dic = results[policy.__name__] - if dic['state_history'] is not None: + if 'state_history' in dic and dic['state_history'] is not None: print("States History for " + policy.__name__ + "is: ", dic['state_history']) - del dic['state_history'] - del dic['qtable'] - del dic['markov'] + try: + del dic['state_history'] + del dic['qtable'] + del dic['markov'] del dic['BB_quantiles'] del dic['SMA_quantiles'] del dic['CASH_quantiles'] @@ -845,6 +861,7 @@ def return_stats(stock='jpm', plt.legend() plt.show() + # display percentages #TODO: display(res) has no display() function. Fix bug. for policy in policies: @@ -895,6 +912,7 @@ def return_stats(stock='jpm', print('Standard deviation of daily return under',nm,'for',stock_name+':',round(np.std(rets[policy],axis=0),3)) # information ratio of daily return + print(len(rets[policy].values), len(bls)) checkhist(rets[policy].values,bls) pr = np.mean(rets[policy].values) br = np.mean(bls) @@ -966,9 +984,11 @@ def return_stats(stock='jpm', print('\n') print('\n') - # TODO: add any additional desired visualizations + # TODO: add any additional desired visualizati ons + # plt.show() plt.show() + if __name__ == '__main__': stocks = ticker diff --git a/src/trainqlearner_util.py b/src/trainqlearner_util.py index ec0ecf1..d447271 100644 --- a/src/trainqlearner_util.py +++ b/src/trainqlearner_util.py @@ -295,14 +295,14 @@ def trainqlearner(ticker, start_date, end_date, window, gamma, episodes, sh): percent_b_states_values, close_sma_ratio_states_value = d.get_states( train_df) + #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value) + cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles() # Create_state_df = Add state information to the DF train_df = d.create_state_df( train_df, percent_b_states_values, close_sma_ratio_states_value) - #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value) - - cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles() + train_df.to_csv("../data/train_dqn_data.csv") # Return a list of strings representing the combination of all the states all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)