forked from dDevTech/Stock-Market-Predictor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAIFinance.py
191 lines (155 loc) · 6.56 KB
/
AIFinance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import numpy as np
import pandas as pd
import pickle
from sklearn import svm,model_selection,neighbors
from sklearn.ensemble import VotingClassifier,RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense, Dropout
from collections import Counter
import os
import matplotlib.pyplot as plt
def process_data_for_labels(ticker):
hm = 7
df = pd.read_csv('sp500_closes.csv',index_col=0)
tickers = df.columns.values.tolist()
df.fillna(0,inplace=True)
for i in range(1, hm+1):
df[f'{ticker}_{i}d']=(df[ticker].shift(-i)-df[ticker])/df[ticker]
df.fillna(0,inplace=True)
return tickers,df
def buy_sell_hold(*args):
cols= [c for c in args]
requirement =0.25
for col in cols:
if(col>requirement):
return 1
if(col<-requirement):
return -1
return 0
def extract_feature_sets(ticker):
tickers,df = process_data_for_labels(ticker)
df[f'{ticker}_target']=list(map(buy_sell_hold,
df[f'{ticker}_1d'],
df[f'{ticker}_2d']
,df[f'{ticker}_3d']
,df[f'{ticker}_4d']
,df[f'{ticker}_5d']
,df[f'{ticker}_6d']
,df[f'{ticker}_7d']))
vals= df[f'{ticker}_target'].values.tolist()
str_vals = [str(i) for i in vals]
print('Data spread', Counter(str_vals))
df.fillna(0,inplace=True)
df= df.replace([np.inf,-np.inf],np.nan)
df.dropna(inplace=True)
df_vals= df[[ticker for ticker in tickers]].pct_change()
df_vals=df_vals.replace([np.inf,-np.inf],0)
df_vals.fillna(0,inplace=True)
X = df_vals.values
y = df[f'{ticker}_target'].values
return X,y,df
def do_ml(ticker):
X,y,df = extract_feature_sets(ticker)
X_train,X_test, y_train,y_test=model_selection.train_test_split(X,y,test_size=0.25)
# clf = neighbors.KNeighborsClassifier()
clf = VotingClassifier([('lsvc',svm.LinearSVC()),('knn',neighbors.KNeighborsClassifier()),('rfor',RandomForestClassifier())])
clf.fit(X_train,y_train)
confidence = clf.score(X_test,y_test)
print('Accuracy',confidence)
prediction = clf.predict(X_test)
print('Predicted spread: ', Counter(prediction))
return confidence
def create_dataset(ticker, daysPrediction=50,splitProportion=0.8):
#Read data
df = pd.read_csv('sp500_closes.csv')
tickers = df.columns.values.tolist()
#Get Adj Close values of ticker and convert it to numpy array
values = df[ticker].values
values=values.reshape(-1,1)
#Split the dataset in the training set and test set
dataset_train = np.array(values[:int(values.shape[0] * splitProportion)])
dataset_test = np.array(values[int(values.shape[0] * splitProportion) - daysPrediction:])
#Normalize dataset in range [0-1]
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_train = scaler.fit_transform(dataset_train)
dataset_test = scaler.transform(dataset_test)
#Create the X and Y variables of the dataset in a format numpy array
x_train, y_train = create_d_dataset(dataset_train,daysPrediction)
x_test, y_test = create_d_dataset(dataset_test,daysPrediction)
#Prepare data for the neural network
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
#Return dataset and other data
return x_test,y_test,x_train,y_train,df,ticker,scaler,values
def create_d_dataset(val,daysPrediction):
x = []
y = []
for i in range(daysPrediction, val.shape[0]):
x.append(val[i-daysPrediction:i, 0])
y.append(val[i, 0])
x = np.array(x)
y = np.array(y)
return x,y
def train(train_x,train_y,ticker):
#Create LSTM RNN model
model = Sequential()
model.add(LSTM(units=96, return_sequences=True, input_shape=(train_x.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=96))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(loss='mean_squared_error', optimizer='adam')
#Train network and save it
if (not os.path.exists(f'stock_prediction-{ticker}.h5')):
model.fit(train_x, train_y, epochs=3, batch_size=32)
model.save(f'stock_prediction-{ticker}.h5')
def show_results(test_x,test_y,train_y,ticker,scaler,values,df):
#Load the trained neural network
model = load_model(f'stock_prediction-{ticker}.h5')
#Use the network to to predict the test dataset
predictions = model.predict(test_x)
print(model.evaluate(test_x,test_y))
print(test_x)
predictions = scaler.inverse_transform(predictions)
print(values)
#Graphically see the prediction of the neural network
fig, ax = plt.subplots(figsize=(8, 4))
print(len(train_y))
y_test_scaled = scaler.inverse_transform(test_y.reshape(-1, 1))
ax.plot(y_test_scaled, color=(0, 0, 0.7, 0.5), label="True Price")
plt.plot(predictions, color=(1, 0, 0,0.5),
label='Predicted Testing Price')
plt.legend()
fig, ax2 = plt.subplots(figsize=(8, 4))
plt.plot(df[ticker].values, color='red', label="True Price")
ax2.plot(range(len(train_y) + 50, len(train_y) + 50 + len(predictions)), predictions, color='blue',
label='Predicted Testing Price')
plt.legend()
def predictFuture(ticker):
#Create the dataset with the given ticker
x_test,y_test,x_train,y_train,df,ticker,scaler,values=create_dataset(ticker)
#Train the RNN network
train(x_train,y_train,ticker=ticker)
#Show results of the training
show_results(x_test,y_test, y_train,ticker,scaler,values,df)
def predictNextYear(ticker):
# Read data
df = pd.read_csv('sp500_closes.csv')
tickers = df.columns.values.tolist()
# Get Adj Close values of ticker and convert it to numpy array
values = df[ticker].values
values = values.reshape(-1, 1)
# Normalize dataset in range [0-1]
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_train = scaler.fit_transform(values)
# Create the X and Y variables of the dataset in a format numpy array
x_train, y_train = create_d_dataset(dataset_train, 50)
# Prepare data for the neural network
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
train(x_train,y_train,ticker)
predictFuture('AMZN')