-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyahoo_downloader.py
118 lines (98 loc) · 4.04 KB
/
yahoo_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
'''Data downloader for Yahoo Finance API 2017'''
import requests
import re
from datetime import datetime
import pandas as pd
import io
import logging
class Downloader:
def __init__(self):
self.DATA_TYPES = ['history', 'div', 'split']
self._cookie = None
self._crumb = None
self.attempt_counter = 0
self.years = 20
self.ticker = None
def settings(self):
'''Return the currently set ticker and year range in a tuple'''
return (self.ticker, self.years)
def _get_crumb_and_cookies(self):
'''Make an initial request to extract cookies and crumb to use in subsequent requests'''
url = 'https://finance.yahoo.com/quote/^GSPC'
r = requests.get(url)
if r.status_code == requests.codes.ok:
self._cookie = r.cookies
search = re.search('\"CrumbStore\"\:\{\"crumb\"\:\"(.*)\"\}\,\"QuotePageStore\"', r.text)
if search is None:
raise Exception('No crumb found in initial response')
else:
self._crumb = search.group(1)
else:
r.raise_for_status()
def _get_single_data_type(self, data_type):
'''Return a dataframe of the specified data type [history|div|split]'''
if self._cookie is None or self._crumb is None:
self._get_crumb_and_cookies()
start_date = datetime.today().replace(year=datetime.today().year - self.years)
self.attempt_counter += 1
params = {
'period1': int(start_date.timestamp()),
'period2': int(datetime.today().timestamp()),
'events': data_type,
'crumb': self._crumb,
'interval': '1d'
}
url = 'https://query1.finance.yahoo.com/v7/finance/download/{}'.format(self.ticker)
r = requests.get(url, params=params, cookies=self._cookie)
if r.status_code == requests.codes.ok:
df = pd.read_csv(io.BytesIO(r.content))
df.set_index(pd.DatetimeIndex(df['Date']), inplace=True)
df.drop('Date', axis=1, inplace=True)
self.attempt_counter = 0
return df
elif r.status_code == 401:
# In case of authorization error renew crumb and cookie and fetch data again. Max. 10 attempts.
# See the issue here: https://github.com/c0redumb/yahoo_quote_download/issues/3
self._crumb = None
self._cookie = None
if self.attempt_counter < 10:
logging.warning('Auth error, retrying...')
return self._get_single_data_type(data_type=data_type)
else:
raise Exception('Permanent Auth Error')
else:
r.raise_for_status()
def _get_all_data_types(self):
'''Return an iterator of all the three data types.'''
data = None
for data_type in self.DATA_TYPES:
try:
data = self._get_single_data_type(data_type=data_type)
except Exception as e:
logging.error(e)
finally:
yield data
def _format_splits(self, value):
'''Format splits to float'''
if value != 1:
numbers = value.split('/')
ratio = int(numbers[0]) / int(numbers[1])
return ratio
else:
return value
def get_history(self, ticker, years=20):
'''Return quotes, dividends and splits in single Pandas DataFrame
for the given ticker and specified number of years ending today (or the latest available).'''
self.ticker = ticker
self.years = years
frames = list(self._get_all_data_types())
try:
full_data = pd.concat(frames, axis=1)
full_data['Dividends'].fillna(0, inplace=True)
full_data['Stock Splits'].fillna(1, inplace=True)
full_data['Stock Splits'] = full_data['Stock Splits'].apply(self._format_splits)
return full_data
except Exception as e:
logging.error(e)
return pd.DataFrame()