-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlstm_eval.py
207 lines (183 loc) · 7.42 KB
/
lstm_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
"""
Evaluation functions for the LSTM.
@author: Riley Smith
Created: 9/17/2023
"""
import csv
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
def preprocess_sequence(seq):
"""Preprocess a sequence of data"""
# First, preprocess the data
initial_sequence = seq[:30]
norm = initial_sequence[-1]
seq = seq / norm[np.newaxis, :]
return seq
def autoregressive_eval(model, data, supervise_every=1, use_volume=True):
"""
An evaluation function to see how network predictions stack up over time.
This one is specifically for the autoregressive model looking one day in the
future.
Parameters
----------
model : tf.keras.Model
The trained network being evaluated
data : ndarray
ndarray of shape (60, 2). 60 days worth of data for one stock.
supervise_every : int
Number of days to run auto-regressive predictions before correcting
with true price.
use_volume : bool
Whether or not volume is a feature for predictions.
"""
data = preprocess_sequence(data)
sequence = data[:30]
# Iteratively make predictions
actual_prices = data[:,0]
predicted_prices = data[:30,0].tolist()
for i in range(30):
predicted_change = float(model.predict(sequence[np.newaxis, :, :], verbose=0)[0])
predicted_price = sequence[-1,0] + predicted_change
predicted_prices.append(predicted_price)
if (i + 1) % supervise_every == 0:
# Use true price and volume for next part of sequence
sequence = data[i + 1: i + 31]
else:
if use_volume:
new_data = np.array([[predicted_price, sequence[-1, 1]]])
sequence = np.concatenate([sequence[1:], new_data], axis=0)
else:
sequence = np.array(sequence[1:].ravel().tolist() + [predicted_price])[:,np.newaxis]
return np.array(predicted_prices), actual_prices
class AutoregressiveEvalCallback(tf.keras.callbacks.Callback):
"""A custom callback for testing the network as a trader"""
def __init__(self, out_folder, use_volume=True, **kwargs):
super().__init__(**kwargs)
# Make sure the folder exists
Path(out_folder).mkdir(exist_ok=True)
self.out_folder = out_folder
self.use_volume = use_volume
def on_epoch_end(self, epoch, logs=None):
# Run the evaluation on each stock
for ticker in VALIDATION_STOCKS:
cols = ['Adj Close', 'Volume'] if self.use_volume else ['Adj Close']
data = pd.read_csv(str(Path('data', f'{ticker}.csv')), usecols=cols)
data = data.to_numpy()[-60:]
for supervise_interval in [1, 2, 10, 30]:
predicted, true = autoregressive_eval(self.model, data,
supervise_interval, self.use_volume)
# Plot it
saveas = str(Path(self.out_folder, f'EPOCH{epoch:03}_{ticker}_{supervise_interval:02}.png'))
fig, ax = plt.subplots()
ax.plot(predicted, color='blue', linestyle='--', label='Predicted close')
ax.plot(true, color='black', label='True close')
ax.set_title(f'Network predictions for {ticker}\nSupervision every {supervise_interval} days')
ax.legend(loc='lower right')
plt.savefig(saveas)
plt.close()
def categorical_eval(model, ds):
"""
An evaluation function for the categorical stock prediction problem.
Take a sequence of data, predict the class (category corresponding to stock
movement), and also compute the true label.
Parameters
----------
model : tf.keras.Model
The neural network being evaluated.
ds : tf.data.Dataset
Tensorflow Dataset object serving up preprocessed batches and
corresponding labels.
"""
all_preds = []
all_labels = []
for batch, label in ds:
# Get predictions for this batch
preds = model.predict(batch).argmax(axis=1)
all_preds.append(preds)
all_labels.append(label.numpy())
# Concatenate results
all_preds = np.concatenate(all_preds, axis=0)
all_labels = np.concatenate(all_labels, axis=0)
# Aggregate results by class
aggregate_results = {}
for class_idx in range(5):
returns = all_labels[np.where(all_preds == class_idx)]
if returns.size == 0:
d = {'av': 'NA', 'std': 'NA'}
else:
d = {
'av': returns.mean(),
'std': returns.std()
}
aggregate_results[class_idx] = d
print('\n\n\nAGGREGATE RESULTS: ', aggregate_results, '\n\n\n')
return aggregate_results
class CategoricalEvalCallback(tf.keras.callbacks.Callback):
"""Custom callback for evaluating performance of categorical model"""
def __init__(self, ds, outfile, **kwargs):
"""
Parameters
----------
ds : tf.data.Dataset
The Tensorflow Dataset object on which to run evaluation.
"""
super().__init__(**kwargs)
self.ds = ds
# Setup output file
self.outfile = outfile
with open(self.outfile, 'w+', newline='') as csvfile:
writer = csv.writer(csvfile)
row = ['Epoch']
for class_idx in range(5):
row.append(f'CLASS{class_idx}_mean')
row.append(f'CLASS{class_idx}_std')
writer.writerow(row)
def on_epoch_end(self, epoch, logs=None):
"""Run evaluation once every epoch"""
results = categorical_eval(self.model, self.ds)
# Write it to CSV
row = [epoch]
for class_dict in results.values():
row.append(class_dict['av'])
row.append(class_dict['std'])
with open(self.outfile, 'a', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(row)
def plot_eval_log(eval_log, title=None, saveas=None):
"""
Take the evaluation log from training and make a plot of average returns
over time for each class, with error bars.
"""
# Load eavluation log
log = pd.read_csv(eval_log)
# Build a figure and set some colors
fig, ax = plt.subplots(figsize=(8,5))
COLORS = ['dodgerblue', 'orange', 'lightcoral', 'aquamarine', 'mediumorchid']
# Plot the series
x = np.arange(log.shape[0]) + 1
for class_idx in range(5):
color = COLORS[class_idx]
# Grab data for this class and get rid of string values
mean_over_time = log[f'CLASS{class_idx}_mean'].to_numpy()
mean_over_time = np.where(mean_over_time == 'NA', np.nan, mean_over_time).astype(float) * 100
std_over_time = log[f'CLASS{class_idx}_std'].to_numpy()
std_over_time = np.where(std_over_time == 'NA', np.nan, std_over_time).astype(float) * 100
# Plot it
ax.plot(x, mean_over_time, label=f'Class {class_idx} mean returns', color=color)
ax.fill_between(x, mean_over_time - std_over_time, mean_over_time + std_over_time,
color=color, alpha=0.2)
# Format axes and title
ax.legend()
ax.set_xlabel('Epoch', fontsize=14)
ax.set_ylabel('Return (%)', fontsize=14)
if title is not None:
ax.set_title(title, fontsize=16)
# Optionally save the figure
if saveas is not None:
plt.savefig(saveas)
plt.close()
else:
plt.show()