-
Notifications
You must be signed in to change notification settings - Fork 129
/
preprocess.py
109 lines (97 loc) · 4.82 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
Functions to preprocess residual time series into time series for use with models.
Add new preprocessing functions here.
"""
import logging
import numpy as np
import torch
# TODO: These should be refactired into a subclass of torch.utils.data.DataLoader or torch.utils.data.Dataset
def preprocess_cumsum(data, lookback):
"""
Preprocess TxN numpy ndarray `data` into cumulative sum windows of integer length `lookback`.
For use with residual returns time series, to create residual cumulative returns time series.
"""
signal_length = lookback
T,N = data.shape
cumsums = np.cumsum(data, axis=0)
windows = np.zeros((T-lookback, N, signal_length), dtype=np.float32)
idxs_selected = np.zeros((T-lookback,N), dtype=bool)
for t in range(lookback,T):
# chooses stocks which have no missing returns in the t-th lookback window
idxs_selected[t-lookback,:] = ~np.any(data[(t-lookback):t,:] == 0, axis = 0).ravel()
idxs = idxs_selected[t-lookback,:]
if t == lookback:
windows[t-lookback,idxs,:] = cumsums[t-lookback:t,idxs].T
else:
# Probably unnecessary given the conv normalization, but is just to have the same setting as in the OU case
windows[t-lookback,idxs,:] = cumsums[t-lookback:t,idxs].T - cumsums[t-lookback-1,idxs].reshape(int(sum(idxs)),1)
idxs_selected = torch.as_tensor(idxs_selected)
return windows, idxs_selected
def preprocess_fourier(data, lookback):
signal_length = lookback
T,N = data.shape
cumsums = np.cumsum(data, axis=0)
windows = np.zeros((T-lookback, N, signal_length), dtype=np.float32)
idxsSelected = np.zeros((T-lookback,N), dtype=bool)
for t in range(lookback,T):
# chooses stocks which have no missing returns in the t-th lookback window
idxsSelected[t-lookback,:] = ~np.any(data[(t-lookback):t,:] == 0, axis = 0).ravel()
idxs = idxsSelected[t-lookback,:]
if t == lookback:
windows[t-lookback,idxs,:] = cumsums[t-lookback:t,idxs].T
else:
# Probably unnecessary given the conv normalization, but is just to have the same setting as in the OU case
windows[t-lookback,idxs,:] = cumsums[t-lookback:t,idxs].T - cumsums[t-lookback-1,idxs].reshape(int(sum(idxs)),1)
idxsSelected = torch.as_tensor(idxsSelected)
Fouriers = np.fft.rfft(windows,axis=-1)
windows[:,:,:(lookback//2+1)] = np.real(Fouriers)
windows[:,:,(lookback//2+1):] = np.imag(Fouriers[:,:,1:-1])
del Fouriers
return windows, idxsSelected
def preprocess_ou(data, lookback):
logging.info("Start OU preprocess")
signal_length = 4
T,N = data.shape
cumsums = np.cumsum(data, axis=0)
windows = np.zeros((T-lookback, N, signal_length), dtype=np.float32)
idxs_selected = np.zeros((T-lookback,N), dtype=bool)
for t in range(lookback,T):
# chooses stocks which have no missing returns in the t-th lookback window
idxs_selected[t-lookback,:] = ~np.any(data[(t-lookback):t,:] == 0, axis=0).ravel()
idxs = idxs_selected[t-lookback,:]
if t == lookback:
x = cumsums[t-lookback:t,idxs].T
else:
# Probably unnecessary given the conv normalization, but is just to have the same setting as in the OU case
x = cumsums[t-lookback:t,idxs].T - cumsums[t-lookback-1,idxs].reshape(int(sum(idxs)),1)
Nx,Tx = x.shape
Ys = x[:,1:] #(N,T-1)
Xs = x[:,:-1]
meansX = np.mean(Xs,axis=1) #(N)
meansY = np.mean(Ys,axis=1)
VarsX = np.var(Xs,axis=1) #N
VarsY = np.var(Ys,axis=1)
Covs = np.mean((Xs-meansX.reshape((Nx,1)))*(Ys-meansY.reshape((Nx,1))),axis=1) #N
R2 = Covs**2/(VarsX*VarsY) #N
bs = Covs/VarsX #N
cs = (meansY - bs*meansX)
mus = (cs/(1-bs+0.000001))
mask = ((bs > 0) * (bs < 1))
#kappas = -np.log(bs)/Deltat #if bs betwen 0 and 1
residuals = Ys - bs.reshape((Nx,1))*Xs - cs.reshape((Nx,1)) #(N,T-1)
sigmas = np.sqrt(np.var(residuals, axis=1)/np.abs(1-bs**2+0.000001)) #N
signal = np.zeros((Nx))
#signal = (mus - Ys[:,-1])/sigmas * mask
signal[mask] = (mus[mask] - Ys[:,-1][mask])/sigmas[mask]
windows[t-lookback,idxs,0] = Ys[:,-1]
windows[t-lookback,idxs,1] = mus
windows[t-lookback,idxs,2] = sigmas
windows[t-lookback,idxs,3] = R2
# if (t-lookback) % 100 == 0:
# logging.info(f"idxs_selected before & mask: {np.sum(idxs_selected[t-lookback,:])}")
idxs_selected[t-lookback,idxs] = idxs_selected[t-lookback,idxs] & mask
# if (t-lookback) % 100 == 0:
# logging.info(f"idxs_selected after & mask: {np.sum(idxs_selected[t-lookback,:])}")
idxs_selected = torch.as_tensor(idxs_selected)
logging.info("Finish OU preprocess")
return windows, idxs_selected