-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAuto_Regression.py
106 lines (92 loc) · 4.08 KB
/
Auto_Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from dotenv import load_dotenv
from statsmodels.tsa.ar_model import AutoReg
# This is imported to check the stationarity of the data
# This is a Stationarity-Test Function
from statsmodels.tsa.stattools import adfuller
# importing acf and pacf plots
from statsmodels.graphics.tsaplots import plot_pacf,plot_acf
load_dotenv()
address = os.getenv("FILE_ADDRESS")
close = "Close"
lag = 2
LAG = 12
def Accuracy(predictions,test)->None:
hits = 0
miss = 0
for i in range(len(predictions)-1):
value = test.iloc[i] - predictions.iloc[i]
if value < 0:
value *= -1
if value/test.iloc[i] < 0.10:
hits += 1
else:
miss += 1
hit_ratio = hits/(len(predictions)-1) * 100
miss_ratio = miss/(len(predictions)-1) * 100
print(f"The Number of Hits are : {hits}\nThe Number of Misses are : {miss}\nThe HIT-RATIO is : {hit_ratio}\nThe MISS-RATIO is : {miss_ratio}\n\n")
def deviations(predictions,test)->[float]:
deviation = []
for i in range(len(test)):
deviation.append(test.iloc[i]-predictions.iloc[i])
return deviation
def main():
df = pd.read_csv(address)
print(df)
plt.plot(df[close],marker="o",color="green",label="Closing")
plt.show()
# The Series should be stationary means statistical values should be constant.( mean = ,vaiance,cyclicity = no
# It is Dicky Fuller Test
# if ‘AIC’ (default) or ‘BIC’, then the number of lags is chosen to minimize the corresponding information criterion.
# To make stationary if AIC method cannot make it.
# Here if the P-value is greater than 0.5 then the Time Series is Not Stationary but if the P-value < 0.5 then the time series is stationary
StationaryTest = adfuller(df[close],autolag="AIC")
# The adfuller function returns a tuple of statistics from the ADF test such as the Test Statistic, P-Value, Number of Lags Used, Number of Observations used for the ADF regression and a dictionary of Critical Values.
# If the P-Value is less than the Significance Level defined, we reject the Null Hypothesis that the time series contains a unit root.
print(f"\n\nADF Test Value : {StationaryTest[0]}\n")
print(f"P-Value : {StationaryTest[1]}\n")
print(f"Number of Lags : {StationaryTest[2]}\n")
print(f"Number of Observations Used for ADF Test : {StationaryTest[3]}\n")
print(f"Critical Values :")
for key,value in StationaryTest[4].items():
print(f"\t{key} : {value}")
# Now to do Auto-Regression we plot pacf and acf to understand the correlation.
# Gives the relationship how in different time periods.
acf = plot_acf(df['Close'],lags = LAG)
# Importance is pacf graph gives the direct effect of previous time lags on current time lag.
pacf = plot_pacf(df['Close'],lags = LAG)
plt.show()
# Now from Pacf deciding the number of lags to take in account.
# Now to divide the data into 2 parts :
train,test = df[close].iloc[0:int(0.70*len(df))] , df[close].iloc[int(0.71*len(df)):]
print("\nThe Training Set is : \n\n")
print(train)
print("\nThe Testing Data Set is :\n")
print(test)
# There we will be calling the Auto-regression Model
st1 = time.time()
model = AutoReg(train,lags = lag).fit()
print("\n\nModel Performance Summary \n\n")
print(f"\n{model.summary()}\n")
# Now we are predicting
predictions = model.predict(start = len(train),end = len(df),dynamic = False)
end1 = time.time()
print("\nThe Prediction Values are : \n\n")
print(predictions)
# Plotting the graph for both the predictions and testing set
plt.plot(predictions,marker="o",color="green",label="PREDICTIONS")
plt.plot(test,marker="x",color="blue",label="TESTING")
plt.legend()
plt.show()
print(f"\nThe Time Taken by the Model Training and Prediction is : {end1-st1:.6f}\n")
print("The Deviations are : \n\n")
deviation = deviations(predictions,test)
for _ in deviation:
print(_)
print("\n\nThe Accuracy of the Model is : \n\n")
Accuracy(predictions,test)
main()