-
Notifications
You must be signed in to change notification settings - Fork 0
/
S&P500_List.py
107 lines (88 loc) · 3.73 KB
/
S&P500_List.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#Getting S&P 500 list
import bs4 as bs
import numpy as np
import pickle #To save the python object on disk
import os
import requests
import datetime as dt
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like #Version correction.
import pandas_datareader as web
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
def save_sp500_tickers():
resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
soup = bs.BeautifulSoup(resp.text,"lxml") #Re-arranging the ticker from wikipedia
table = soup.find('table',{'class':'wikitable sortable'})
tickers = [] # Storing them in an array
for row in table.findAll('tr')[1:]:
ticker = row.findAll('td')[0].text
tickers.append(ticker)
with open("sp500tickers.pickle","wb") as f: #opening folder and writing in the file
pickle.dump(tickers,f)
print(tickers)
return tickers
#Getting the companies data from tickers (SOURCE= Robinhood)
def get_data_from_robinhood(reload_sp500=False):
if reload_sp500:
tickers= save_sp500_tickers()
else:
with open("sp500tickers.pickle","rb") as f: #opening folder and reading the file
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'): #Creating the folder and converting the
os.makedirs('stock_dfs') #data to csv and saving on the disk.
start = dt.datetime(2017,8,15)
end = dt.datetime(2018,8,15)
for ticker in tickers:
print(ticker)
if not os.path.exists('stock_dfs/{}.csv'.format(ticker)):
df= web.DataReader(ticker,'robinhood',start,end)
df.to_csv('stock_dfs/{}.csv'.format(ticker))
else:
print('Already have {}'.format(ticker))
#get_data_from_robinhood()
#COMPILING ALL THE DATA IN SINGLE DATAFRAME
def compile_data():
with open("sp500tickers.pickle","rb") as f: #opening folder and reading the file
tickers = pickle.load(f)
main_df = pd.DataFrame()
#Taking all the stocks and arranging all accoriding to its closing price.
for count,ticker in enumerate(tickers):
df = pd.read_csv('stock_dfs/{}.csv'.format(ticker))
df.set_index('begins_at',inplace=True)
df.rename(columns = {'close_price':ticker},inplace=True)
df.drop(['symbol','session','high_price','interpolated','low_price','open_price','volume'], 1,inplace=True) #Dropping all columns except Closing Price.
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df,how='outer')
if(count%10==0):
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_closes.csv')
#Creating the correlation table of the dataframe.
def visualize_data():
df = pd.read_csv('sp500_joined_closes.csv')
## df['AAPL'].plot()
## plt.show()
df_corr = df.corr() #Creating Correlation table for all the dataframe.
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1,1,1) #1x1 and plot no.1
heatmap = ax.pcolor(data,cmap=plt.cm.RdYlGn) #Colors
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[0]) + 0.5, minor=False) #arranging ticks at every 1/2 mark
ax.set_yticks(np.arange(data.shape[1]) + 0.5, minor=False) #In both X and Y axes
ax.invert_yaxis()
ax.xaxis.tick_top()
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels)
ax.set_yticklabels(row_labels)
plt.xticks(rotation=90)
heatmap.set_clim(-1,1) #No need for this line in case of covariance
plt.tight_layout()
plt.show()
visualize_data()