diff --git a/src/data_process.py b/src/data_process.py
index 0a9f31f..a0269a0 100644
--- a/src/data_process.py
+++ b/src/data_process.py
@@ -108,6 +108,7 @@ def get_stock_data(symbol, start, end):
     df = pd.read_csv('train.csv',index_col='Date')
     df.index=pd.to_datetime(df.index)
     '''
+    # df.to_csv("../data/current.csv")
     return df
 
 
diff --git a/src/dqn/.gitignore b/src/dqn/.gitignore
new file mode 100644
index 0000000..ca64f85
--- /dev/null
+++ b/src/dqn/.gitignore
@@ -0,0 +1,6 @@
+.git
+__pycache__
+.ipynb_checkpoints
+
+models
+runs
\ No newline at end of file
diff --git a/src/dqn/README.md b/src/dqn/README.md
new file mode 100644
index 0000000..b57ef6f
--- /dev/null
+++ b/src/dqn/README.md
@@ -0,0 +1,30 @@
+# Q-Trader
+
+** Use in your own risk **
+
+Pytorch implmentation from q-trader(https://github.com/edwardhdlu/q-trader)
+
+## Results
+
+Some examples of results on test sets:
+
+![HSI2018](images/%5EHSI_2018.png)  
+Starting Capital: $100,000.  
+HSI, 2017-2018. Profit of $10702.13.
+
+## Running the Code
+
+To train the model, download a training and test csv files from [Yahoo! Finance](https://ca.finance.yahoo.com/quote/%5EGSPC/history?p=%5EGSPC) into `data/`
+```
+mkdir models
+python train ^GSPC 10 1000
+```
+
+Then when training finishes (minimum 200 episodes for results):
+```
+jupyter notebook -> visualize.ipynb
+```
+
+## References
+
+[Deep Q-Learning with Keras and Gym](https://keon.io/deep-q-learning/) - Q-learning overview and Agent skeleton code
diff --git a/src/dqn/agent/__init__.py b/src/dqn/agent/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/dqn/agent/agent.py b/src/dqn/agent/agent.py
new file mode 100644
index 0000000..81f4952
--- /dev/null
+++ b/src/dqn/agent/agent.py
@@ -0,0 +1,88 @@
+from agent.memory import Transition, ReplayMemory
+from agent.model import DQN
+
+import numpy as np
+import random
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+import os
+
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = torch.device("cpu")
+# print(device)
+
+class Agent:
+	def __init__(self, state_size, is_eval=False):
+		self.state_size = state_size # normalized previous days
+		self.action_size = 3 # sit, buy, sell
+		self.memory = ReplayMemory(10000)
+		self.inventory = []
+		self.is_eval = is_eval
+
+		self.gamma = 0.95
+		self.epsilon = 1.0
+		self.epsilon_min = 0.01
+		self.epsilon_decay = 0.99995
+		self.batch_size = 32
+		if os.path.exists('models/target_model'):
+			self.policy_net = torch.load('models/policy_model', map_location=device)
+			self.target_net = torch.load('models/target_model', map_location=device)
+		else:
+			self.policy_net = DQN(state_size, self.action_size)
+			self.target_net = DQN(state_size, self.action_size)
+		self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=0.005, momentum=0.9)
+
+	def act(self, state):
+		if not self.is_eval and np.random.rand() <= self.epsilon:
+			return random.randrange(self.action_size)
+
+		tensor = torch.FloatTensor(state).to(device)
+		options = self.target_net(tensor)
+		return np.argmax(options[0].detach().numpy())
+
+	def optimize(self):
+		if len(self.memory) < self.batch_size:
+				return
+		transitions = self.memory.sample(self.batch_size)
+		# Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
+		# detailed explanation). This converts batch-array of Transitions
+		# to Transition of batch-arrays.
+		batch = Transition(*zip(*transitions))
+
+		# Compute a mask of non-final states and concatenate the batch elements
+		# (a final state would've been the one after which simulation ended)
+		next_state = torch.FloatTensor(batch.next_state).to(device)
+		non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, next_state)))
+		non_final_next_states = torch.cat([s for s in next_state if s is not None])
+		state_batch = torch.FloatTensor(batch.state).to(device)
+		action_batch = torch.LongTensor(batch.action).to(device)
+		reward_batch = torch.FloatTensor(batch.reward).to(device)
+
+		# Compute Q(s_t, a) - the model computes Q(s_t), then we select the
+		# columns of actions taken. These are the actions which would've been taken
+		# for each batch state according to policy_net
+
+		# print(state_batch.shape, action_batch.shape, self.batch_size)
+		state_action_values = self.policy_net(state_batch).reshape((self.batch_size, 3)).gather(1, action_batch.reshape((self.batch_size, 1)))
+
+		# Compute V(s_{t+1}) for all next states.
+		# Expected values of actions for non_final_next_states are computed based
+		# on the "older" target_net; selecting their best reward with max(1)[0].
+		# This is merged based on the mask, such that we'll have either the expected
+		# state value or 0 in case the state was final.
+		next_state_values = torch.zeros(self.batch_size, device=device)
+		next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
+		# Compute the expected Q values
+		expected_state_action_values = (next_state_values * self.gamma) + reward_batch
+
+		# Compute Huber loss
+		loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))
+
+		# Optimize the model
+		self.optimizer.zero_grad()
+		loss.backward()
+		for param in self.policy_net.parameters():
+				param.grad.data.clamp_(-1, 1)
+		self.optimizer.step()
\ No newline at end of file
diff --git a/src/dqn/agent/memory.py b/src/dqn/agent/memory.py
new file mode 100644
index 0000000..dc4da08
--- /dev/null
+++ b/src/dqn/agent/memory.py
@@ -0,0 +1,26 @@
+import random
+from collections import namedtuple
+
+Transition = namedtuple('Transition',
+                        ('state', 'action', 'next_state', 'reward'))
+
+
+class ReplayMemory(object):
+
+    def __init__(self, capacity):
+        self.capacity = capacity
+        self.memory = []
+        self.position = 0
+
+    def push(self, *args):
+        """Saves a transition."""
+        if len(self.memory) < self.capacity:
+            self.memory.append(None)
+        self.memory[self.position] = Transition(*args)
+        self.position = (self.position + 1) % self.capacity
+
+    def sample(self, batch_size):
+        return random.sample(self.memory, batch_size)
+
+    def __len__(self):
+        return len(self.memory)
diff --git a/src/dqn/agent/model.py b/src/dqn/agent/model.py
new file mode 100644
index 0000000..e0c5d5f
--- /dev/null
+++ b/src/dqn/agent/model.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+class DQN(nn.Module):
+	def __init__(self, state_size, action_size):
+		super(DQN, self).__init__()
+		self.main = nn.Sequential(
+			nn.Linear(state_size, 64),
+			nn.LeakyReLU(0.01, inplace=True),
+			nn.Linear(64, 32),
+			nn.LeakyReLU(0.01, inplace=True),
+			nn.Linear(32, 8),
+			nn.LeakyReLU(0.01, inplace=True),
+			nn.Linear(8, action_size),
+		)
+	
+	def forward(self, input):
+		return self.main(input)
diff --git a/src/dqn/evaluate.py b/src/dqn/evaluate.py
new file mode 100644
index 0000000..7e6969c
--- /dev/null
+++ b/src/dqn/evaluate.py
@@ -0,0 +1,164 @@
+import torch
+import numpy as np
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt
+import numpy as np
+
+from agent.agent import Agent
+from functions import *
+import pandas as pd
+
+# stock_name = '^HSI_2018'
+window_size = 5
+
+'''
+agent = Agent(window_size, True)
+data, adj_close = getStockDataVec(stock_name)
+l = len(data) - 1
+batch_size = 32
+episode_count = 1000
+'''
+action_map = {0:"HOLD", 1: "BUY", 2:"SELL"}
+def DQN(stock_table=None, money= None, inc= None, original_shares= None, commission= None):
+
+	window_size=3
+	cash, num_shares = 100_000, 0
+	sh = 50
+	agent = Agent(window_size)
+	data, adj_close, date = getStockDataVec(path = "../data/test_dqn_data.csv")
+	l = len(adj_close) - 1
+	batch_size = 32
+	episode_count = 5
+
+
+
+	closes = []
+	buys = 	[]
+	sells = []
+
+
+	final_vals, actions, shares, cashes, dates = [], [], [], [], []
+
+	episode_count=1
+	for e in range(episode_count):
+		closes = []
+		buys = []
+		sells = []
+
+
+		state = getState(data, 0, window_size + 1)
+		total_profit = 0
+		agent.inventory = []
+
+
+		# capital = 100000
+		for t in range(l):
+			#action = agent.act(state)
+			action = np.random.randint(0, 3)
+			closes.append(data[t])
+
+			# sit
+			next_state = getState(data, t + 1, window_size + 1)
+			reward = 0
+			'''
+			if action == 1: # buy
+				if capital > adj_close[t]:
+					agent.inventory.append(adj_close[t])
+					buys.append(adj_close[t])
+					sells.append(None)
+					capital -= adj_close[t]
+				else:
+					buys.append(None)
+					sells.append(None)
+	
+			elif action == 2: # sell
+				if len(agent.inventory) > 0:
+					bought_price = agent.inventory.pop(0)
+					reward = max(adj_close[t] - bought_price, 0)
+					total_profit += adj_close[t] - bought_price
+					buys.append(None)
+					sells.append(adj_close[t])
+					capital += adj_close[t]
+				else:
+					buys.append(None)
+					sells.append(None)
+			elif action == 0:
+				buys.append(None)
+				sells.append(None)
+			'''
+
+			next_adj_close = adj_close[t+1]
+			current_adj_close = adj_close[t]
+
+			# get reward
+			if action == 0:  # hold
+				if num_shares > 0:
+					next_cash = cash  # no change
+					reward = (cash + num_shares * next_adj_close) - (cash + num_shares * current_adj_close)
+				else:
+					reward = 0
+
+			if action == 1:  # buy
+				if cash > sh * current_adj_close:
+					next_cash = cash - sh * current_adj_close
+					# reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close)
+					reward = (next_cash + ((num_shares + sh) * next_adj_close)) - (cash + num_shares * current_adj_close)
+					num_shares += sh
+					cash = next_cash
+				else:
+					reward = 0
+
+			if action == 2:  # sell
+				if num_shares > 0:
+					next_cash = cash + sh * current_adj_close
+					# reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close)
+					reward = (next_cash + ((num_shares - sh) * next_adj_close)) - (cash + num_shares * current_adj_close)
+					num_shares -= sh
+					cash = next_cash
+				else:
+					reward = 0
+
+
+
+			done = True if t == l - 1 else False
+			agent.memory.push(state, action, next_state, reward)
+			state = next_state
+
+			'''
+			if done:
+				print("--------------------------------")
+				print(" Total Profit: " + formatPrice(total_profit))
+				print(" Total Shares: ", )
+				print("--------------------------------")
+			'''
+			if done:
+				print("--------------------------------")
+				print("Total Profit: " + formatPrice(total_profit))
+				print("Total Reward: ", reward)
+				print("Total shares: ", num_shares)
+				print("Total cash: ",  cash)
+				print("--------------------------------")
+
+
+			cur_cash, cur_shares = cash, num_shares
+			final_vals.append(cur_cash + (cur_shares * adj_close[t]))
+			cashes.append(cur_cash)
+			actions.append(action_map[action])
+			shares.append(num_shares)
+			dates.append(date[t])
+
+		cashes = pd.Series(cashes,index = pd.to_datetime(dates))
+		shares = pd.Series(shares,index = pd.to_datetime(dates))
+		actions = pd.Series(actions,index = pd.to_datetime(dates))
+		final_vals = pd.Series(final_vals,index = pd.to_datetime(dates))
+
+		results = {'final_vals': final_vals, 'actions': actions, 'shares': shares, 'cash': cashes}
+
+
+	return results
+
+if __name__=="__main__":
+	res = DQN()
+	dic = pd.DataFrame(res)
+	print(dic)
\ No newline at end of file
diff --git a/src/dqn/functions.py b/src/dqn/functions.py
new file mode 100644
index 0000000..39126bc
--- /dev/null
+++ b/src/dqn/functions.py
@@ -0,0 +1,23 @@
+import numpy as np
+import math
+
+# prints formatted price
+def formatPrice(n):
+	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))
+
+def getStockDataVec(key=None,path=None):
+	vec, states, date = [], [], []
+	lines = open(path, "r").read().splitlines()
+	for line in lines[5:]:
+		row = line.split(",")
+		close = row[6]
+		if close != 'null':
+			date.append(row[0])
+			vec.append(float(row[6]))
+			states.append(list(map(float, row[11:13])))
+
+	return states, vec, date
+
+# returns an an n-day state representation ending at time t
+def getState(data, t, n):
+	return np.array([data[t]])
diff --git a/src/dqn/train.py b/src/dqn/train.py
new file mode 100644
index 0000000..d8cf726
--- /dev/null
+++ b/src/dqn/train.py
@@ -0,0 +1,119 @@
+from agent.agent import Agent
+from agent.memory import Transition, ReplayMemory
+from functions import *
+import sys
+import torch
+
+
+device = torch.device("cpu")
+
+# if torch.cuda.is_available():
+#     device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc.
+#     print("Running on the GPU")
+# else:
+#     device = torch.device("cpu")
+#     print("Running on the CPU")
+
+# if len(sys.argv) != 4:
+# 	print("Usage: python train.py [stock] [window] [episodes]")
+# 	exit()
+
+# stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])
+
+window_size, episode_count = 2, 100
+cash, num_shares, sh = 100_000, 0, 50
+state_size = 2
+agent = Agent(state_size)
+data, adj_close, date = getStockDataVec(path="../../data/train_dqn_data.csv")
+l = len(adj_close) - 1
+
+
+for e in range(episode_count + 1):
+	print("Episode " + str(e) + "/" + str(episode_count))
+	state = getState(data, 0, window_size + 1)
+
+	total_profit = 0
+	agent.inventory = []
+
+	for t in range(l):
+
+		action = agent.act(state)
+		next_state = getState(data, t + 1, window_size + 1)
+		reward = 0
+
+		'''
+		# sit
+		if num_shares>0:
+			reward = (adj_close[t+1] - adj_close[t]) * num_shares
+		else:
+			reward = 0
+
+		if action == 1: # buy
+			# agent.inventory.append(data[t])
+			agent.inventory.append(adj_close[t])
+			num_shares+=1
+			reward = 0
+			# print("Buy: " + formatPrice(data[t]))
+
+		elif action == 2 and len(agent.inventory) > 0: # sell
+			bought_price = agent.inventory.pop(0)
+			# reward = max(data[t] - bought_price, 0)
+			# total_profit += data[t] - bought_price
+			# print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
+
+			reward = (adj_close[t]-bought_price)*num_shares
+			num_shares-=1
+			total_profit += adj_close[t] - bought_price
+		'''
+
+		next_adj_close = adj_close[t+1]
+		current_adj_close = adj_close[t]
+
+
+		# get reward
+		if action == 0:  # hold
+			if num_shares > 0:
+				next_cash = cash  # no change
+				reward = (cash + num_shares * next_adj_close) - (cash + num_shares * current_adj_close)
+			else:
+				reward = 0
+
+		if action == 1:  # buy
+			if cash > sh * current_adj_close:
+				next_cash = cash - sh * current_adj_close
+				# reward = (cash - current_adj_close + ((num_shares+1)*next_adj_close)) - (cash + num_shares*current_adj_close)
+				reward = (next_cash + ((num_shares + sh) * next_adj_close)) - (cash + num_shares * current_adj_close)
+				num_shares += sh
+				cash = next_cash
+			else:
+				reward = 0
+
+		if action == 2:  # sell
+			if num_shares > 0:
+				next_cash = cash + sh * current_adj_close
+				# reward = (cash + current_adj_close + ((num_shares-1)*next_adj_close)) - (cash + num_shares*current_adj_close)
+				reward = (next_cash + ((num_shares - sh) * next_adj_close)) - (cash + num_shares * current_adj_close)
+				num_shares -= sh
+				cash = next_cash
+			else:
+				reward = 0
+
+
+		done = True if t == l - 1 else False
+		agent.memory.push(state, action, next_state, reward)
+		state = next_state
+
+		if done:
+			print("--------------------------------")
+			print("Total Profit: " + formatPrice(total_profit))
+			print("Total Reward: ", reward)
+			print("Total shares: ", num_shares)
+			print("Total cash: ",  cash)
+			print("--------------------------------")
+
+		agent.optimize()
+
+	if e % 10 == 0:
+		agent.target_net.load_state_dict(agent.policy_net.state_dict())
+		torch.save(agent.policy_net, "models/policy_model")
+		torch.save(agent.target_net, "models/target_model")
\ No newline at end of file
diff --git a/src/sim.py b/src/sim.py
index 0bfcd42..284fbef 100644
--- a/src/sim.py
+++ b/src/sim.py
@@ -13,6 +13,11 @@
 import time
 import pandas_datareader.data as web # fetch stock data
 
+import sys
+# sys.path.insert(1, "C:/Users/amogh/Appledore/Fall-2020/Capstone/q-trading-pytorch/")
+from evaluate import DQN
+
+
 #TODO: make this a single function call
 ticker = 'JPM'
 
@@ -20,6 +25,12 @@
 start = '2017-01-01'
 end = '2019-12-31'
 
+dqn_data = d.get_stock_data(ticker,start,end)
+dqn_data.to_csv("../data/test_dqn_data.csv")
+
+dqn_result = DQN()
+
+
 start_date = dt.datetime(2007, 1, 1)
 end_date = dt.datetime(2016, 12, 31)
 
@@ -294,7 +305,6 @@ def rule_based(stock_table,money,inc, original_shares,commission):
 
     # normalize markov
     markov = markov.divide(markov.sum(axis=1),axis=0).round(2)
-
     results = {'final_vals':final_vals,'actions':actions,'shares':shares,'cash':cash,'qtable':None, 'markov':markov, 'state_history': None}
     return results
 
@@ -586,7 +596,7 @@ def qlearner(stock_table,money,inc, original_shares, commission, q_table = nq, t
     temp['shares'] = curr_shares
     temp['shares_state'] = curr_shares_s
     temp['state'] = final_states
-    temp.to_csv('./data/viz_data.csv')
+    temp.to_csv('../data/viz_data.csv')
 
 
     actions = pd.Series(act_list, index=stock_table.index)
@@ -608,7 +618,7 @@ def return_stats(stock='jpm',
                  money=100000,
                  #inc=10,- can read this argument and change code below if doing absolute share-based
                  #original_shares=100, - can read this argument and change code below if doing absolute share-based
-                 policies=[hold,random_action,rule_based,ols,buy_always,qlearner]):
+                 policies=[hold,random_action,rule_based,ols,buy_always,qlearner,enact_dqn]):
 
     '''
     Enacts every strategy and provides summary statistics and graphs
@@ -655,6 +665,9 @@ def return_stats(stock='jpm',
                                       original_shares = original_shares,
                                      commission = commission) for policy in policies}
 
+
+    # results['DQN'] = dqn_result
+
     actions_history = results['qlearner']['actions_history']
 
     days, prices, actions = [], [], []
@@ -683,15 +696,9 @@ def return_stats(stock='jpm',
     holds.to_csv('./data/hold_data.csv')
 
 
-
-
-
-
-
-
     # plot qtables only for qlearner (or any other strategies with Q table)
-    for policy in policies:
-        if results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies
+    for policy in policies[:-1]:
+        if 'qtable' in results[policy.__name__] and results[policy.__name__]['qtable'] is not None: #don't try to plot Q tables for benchmark strategies
 
             # get state history and quantile length and qtable for normalization and averaging function
             state_history = results[policy.__name__]['state_history']
@@ -714,7 +721,7 @@ def return_stats(stock='jpm',
             plt.gca().tick_params(axis='y',bottom=False,left=False)
             plt.show(fig)
 
-            # marginalize over SMA
+            # marginalize   over SMA
             # TODO - determine if this mean was taken correctly
             quantile_length = len(results[policy.__name__]['SMA_quantiles'])
             qtab_sma = weighted_average_and_normalize(qtab, state_history, 0, quantile_length)
@@ -766,24 +773,26 @@ def return_stats(stock='jpm',
 
 
     # get markov transition models
-    for policy in policies:
-        plt.figure(figsize=(6,3))
-        plt.title('Transition Matrix For '+policy.__name__,size=16)
-        mkv = results[policy.__name__]['markov']
-        fig = heatmap(mkv,annot=True,annot_kws={'size':14},cmap='Greens',cbar=False)
-        plt.xticks(fontsize=14)
-        plt.yticks(fontsize=14,rotation=0)
-        plt.gca().set(xlabel='Current Trading Day', ylabel='Last Trading Day')
-        plt.gca().tick_params(axis='x',bottom=False,left=False)
-        plt.gca().tick_params(axis='y',bottom=False,left=False)
-        plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white')
-        plt.show(fig)
+    for policy in policies[:-1]:
+        if 'markov' in results[policy.__name__]:
+            plt.figure(figsize=(6,3))
+            plt.title('Transition Matrix For '+policy.__name__,size=16)
+            mkv = results[policy.__name__]['markov']
+            fig = heatmap(mkv,annot=True,annot_kws={'size':14},cmap='Greens',cbar=False)
+            plt.xticks(fontsize=14)
+            plt.yticks(fontsize=14,rotation=0)
+            plt.gca().set(xlabel='Current Trading Day', ylabel='Last Trading Day')
+            plt.gca().tick_params(axis='x',bottom=False,left=False)
+            plt.gca().tick_params(axis='y',bottom=False,left=False)
+            plt.gca().hlines([1,2],xmin=0,xmax=10,linewidth=10,color='white')
+            plt.show(fig)
 
 
     # plot daily portfolio values
     plt.figure(figsize=(14,8))
-    for policy in policies:
+    for policy in policies[-1:]:
         plt.plot(results[policy.__name__]['final_vals'],label = policy.__name__)
+    # plt.plot(results['DQN']['final_vals'], label = 'DQN')
     plt.legend()
     plt.xlabel("Date",fontsize=20)
     plt.ylabel("Portfolio Value ($)",fontsize=20)
@@ -793,7 +802,10 @@ def return_stats(stock='jpm',
     # plot daily cash values
     plt.figure(figsize=(14,8))
     for policy in policies:
+        print(results[policy.__name__]['cash'])
         plt.plot(results[policy.__name__]['cash'],label = policy.__name__)
+
+    # plt.plot(results['DQN']['cash'], label='DQN')
     plt.legend()
     plt.xlabel("Date",fontsize=20)
     plt.ylabel("Cash Held ($)",fontsize=20)
@@ -803,7 +815,10 @@ def return_stats(stock='jpm',
     # plot daily shares
     plt.figure(figsize=(14,8))
     for policy in policies:
+        # print(results[policy.__name__]['shares'])
         plt.plot(results[policy.__name__]['shares'],label = policy.__name__)
+
+    # plt.plot(results['DQN']['shares'], label='DQN')
     plt.legend()
     plt.xlabel("Date",fontsize=20)
     plt.ylabel("Shares Held",fontsize=20)
@@ -811,15 +826,16 @@ def return_stats(stock='jpm',
     plt.show()
 
     # plot daily portfolio values
-    for i, policy in enumerate(policies):
+    for i, policy in enumerate(policies[-1:]):
         dic = results[policy.__name__]
-        if dic['state_history'] is not None:
+        if 'state_history' in dic and dic['state_history'] is not None:
             print("States History for " + policy.__name__ + "is: ", dic['state_history'])
 
-        del dic['state_history']
-        del dic['qtable']
-        del dic['markov']
+
         try:
+            del dic['state_history']
+            del dic['qtable']
+            del dic['markov']
             del dic['BB_quantiles']
             del dic['SMA_quantiles']
             del dic['CASH_quantiles']
@@ -845,6 +861,7 @@ def return_stats(stock='jpm',
         plt.legend()
         plt.show()
 
+
     # display percentages
     #TODO: display(res) has no display() function. Fix bug.
     for policy in policies:
@@ -895,6 +912,7 @@ def return_stats(stock='jpm',
         print('Standard deviation of daily return under',nm,'for',stock_name+':',round(np.std(rets[policy],axis=0),3))
 
         # information ratio of daily return
+        print(len(rets[policy].values), len(bls))
         checkhist(rets[policy].values,bls)
         pr = np.mean(rets[policy].values)
         br = np.mean(bls)
@@ -966,9 +984,11 @@ def return_stats(stock='jpm',
                 print('\n')
             print('\n')
 
-    # TODO: add any additional desired visualizations
+    # TODO: add any additional desired visualizati  ons
+    # plt.show()
     plt.show()
 
+
 if __name__ == '__main__':
 
     stocks = ticker
diff --git a/src/trainqlearner_util.py b/src/trainqlearner_util.py
index ec0ecf1..d447271 100644
--- a/src/trainqlearner_util.py
+++ b/src/trainqlearner_util.py
@@ -295,14 +295,14 @@ def trainqlearner(ticker, start_date, end_date, window, gamma, episodes, sh):
     percent_b_states_values, close_sma_ratio_states_value = d.get_states(
         train_df)
 
+    #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value)
 
+    cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
     # Create_state_df =  Add state information to the DF
     train_df = d.create_state_df(
         train_df, percent_b_states_values, close_sma_ratio_states_value)
 
-    #train_df = d.create_state_df(train_df, None, percent_b_states_values, close_sma_ratio_states_value)
-
-    cash_states_values, shares_states_values = d.create_cash_and_holdings_quantiles()
+    train_df.to_csv("../data/train_dqn_data.csv")
 
     # Return a list of strings representing the combination of all the states
     all_states = d.get_all_states(percent_b_states_values, close_sma_ratio_states_value, cash_states_values, shares_states_values)