Skip to content

Commit 57a73fc

Browse files
authored
Merge pull request #248 from dpguthrie/add-consent-handling
Add consent handling
2 parents ff568c8 + 399284b commit 57a73fc

File tree

8 files changed

+127
-62
lines changed

8 files changed

+127
-62
lines changed

CHANGELOG.rst

+20-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,23 @@
11
Change Log
22
==========
33

4+
2.3.7
5+
-----
6+
## Add
7+
- Logic for handling setting up a session when a consent screen is encountered. This is primarily seen in European countries
8+
and should allow for the continued use of this package.
9+
- Keyword argument, `setup_url`, to the base `_YahooFinance` class that allows a user to override the url used in setting up the session. As a default
10+
the Yahoo Finance home page is used (https://finance.yahoo.com). You can also create an environment variable, `YF_SETUP_URL` that will be used if set.
11+
Example usage:
12+
```python
13+
import yahooquery as yq
14+
15+
t = yq.Ticker('aapl', setup_url='https://finance.yahoo.com/quote/AAPL')
16+
```
17+
18+
## Remove
19+
- Webdriver manager is no longer used internally. Selenium Manager is now fully included with selenium `4.10.0`, so this package is no longer needed.
20+
421
2.3.6
522
-----
623
## Fix
@@ -110,7 +127,7 @@ Change Log
110127
to adjust the timezone (:code:`adj_timezone`) to the ticker's timezone. It defaults
111128
to :code:`True`.
112129
- Further documentation of acceptable keyword arguments to the :code:`Ticker` class.
113-
- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` -
130+
- :code:`Ticker.news` is now a method. It accepts two arguments: :code:`count` -
114131
number of items to return; :code:`start` - start date to begin retrieving news items from
115132
- Bug fixes: :code:`Ticker.history` method no longer returns extra rows when retrieving
116133
intraday data.
@@ -131,12 +148,12 @@ Change Log
131148
:code:`p_valuation_measures` and supply either :code:`a`, :code:`q`, or
132149
:code:`m` (annual, quarterly, monthly). The data returned with these can
133150
be seen in the `Statistics` tab through the Yahoo Finance front-end.
134-
151+
135152
.. image:: demo/valuation_measures.PNG
136153

137154
2.2.2
138155
-----
139-
- Fix bug in retrieving cash flow / income statement data. Most recent month was
156+
- Fix bug in retrieving cash flow / income statement data. Most recent month was
140157
combining with TTM. A new column was created in the dataframe called 'periodType'.
141158
Annual data will be shown as '12M', quarterly data will be shown as '3M', and
142159
trailing 12 month data will be shown as 'TTM'.

poetry.lock

+31-32
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "yahooquery"
3-
version = "2.3.6"
3+
version = "2.3.7"
44
description = "Python wrapper for an unofficial Yahoo Finance API"
55
authors = ["Doug Guthrie <[email protected]>"]
66
documentation = "https://yahooquery.dpguthrie.com"
@@ -16,7 +16,7 @@ requests-futures = "^1.0.1"
1616
tqdm = "^4.65.0"
1717
lxml = "^4.9.3"
1818
selenium = {version = "^4.10.0", optional = true}
19-
webdriver-manager = {version = "^3.8.6", optional = true}
19+
beautifulsoup4 = "^4.12.2"
2020

2121
[tool.poetry.dev-dependencies]
2222
pytest = "^7.4.0"
@@ -34,4 +34,4 @@ requires = ["poetry-core>=1.0.0"]
3434
build-backend = "poetry.core.masonry.api"
3535

3636
[tool.poetry.extras]
37-
premium = ["selenium", "webdriver-manager"]
37+
premium = ["selenium"]

yahooquery/__init__.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
"""Python interface to unofficial Yahoo Finance API endpoints"""
22

33
name = "yahooquery"
4-
__version__ = "2.3.6"
4+
__version__ = "2.3.7"
55

6-
from .research import Research # noqa
7-
from .ticker import Ticker # noqa
8-
from .screener import Screener # noqa
96
from .misc import ( # noqa
107
get_currencies,
118
get_exchanges,
129
get_market_summary,
1310
get_trending,
1411
search,
1512
)
13+
from .research import Research # noqa
14+
from .screener import Screener # noqa
15+
from .ticker import Ticker # noqa

yahooquery/base.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# stdlib
2+
import logging
23
import os
34
import time
45
from concurrent.futures import as_completed
@@ -26,6 +27,9 @@
2627
import urlparse as parse
2728

2829

30+
logger = logging.getLogger(__name__)
31+
32+
2933
class _YahooFinance(object):
3034
CHUNK = 1500
3135

@@ -938,11 +942,12 @@ def __init__(self, **kwargs):
938942
self.progress = kwargs.pop("progress", False)
939943
self.username = kwargs.pop("username", os.getenv("YF_USERNAME", None))
940944
self.password = kwargs.pop("password", os.getenv("YF_PASSWORD", None))
945+
self._setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None))
941946
self.session = initialize_session(kwargs.pop("session", None), **kwargs)
942947
if self.username and self.password:
943948
self.login()
944949
else:
945-
self.session = setup_session(self.session)
950+
self.session = setup_session(self.session, self._setup_url)
946951
self.crumb = get_crumb(self.session)
947952

948953
@property
@@ -991,13 +996,27 @@ def default_query_params(self):
991996
params["crumb"] = self.crumb
992997
return params
993998

994-
def login(self):
999+
def login(self) -> None:
9951000
if _has_selenium:
9961001
instance = YahooFinanceHeadless(self.username, self.password)
9971002
instance.login()
998-
self.session.cookies = instance.cookies
1003+
if instance.cookies:
1004+
self.session.cookies = instance.cookies
1005+
return
9991006

1000-
return []
1007+
else:
1008+
logger.warning(
1009+
"Unable to login and/or retrieve the appropriate cookies. This is "
1010+
"most likely due to Yahoo Finance instituting recaptcha, which "
1011+
"this package does not support."
1012+
)
1013+
1014+
else:
1015+
logger.warning(
1016+
"You do not have the required libraries to use this feature. Install "
1017+
"with the following: `pip install yahooquery[premium]`"
1018+
)
1019+
self.session = setup_session(self.session, self._setup_url)
10011020

10021021
def _chunk_symbols(self, key, params={}, chunk=None, **kwargs):
10031022
current_symbols = self.symbols

yahooquery/headless.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@
77
try:
88
# third party
99
from selenium import webdriver
10-
from selenium.common.exceptions import NoSuchElementException, TimeoutException
11-
from selenium.webdriver.chrome.options import Options
12-
from selenium.webdriver.chrome.service import Service as ChromeService
10+
from selenium.common.exceptions import TimeoutException
11+
from selenium.webdriver.chrome.service import Service
1312
from selenium.webdriver.common.by import By
1413
from selenium.webdriver.support import expected_conditions as EC
1514
from selenium.webdriver.support.ui import WebDriverWait
16-
from webdriver_manager.chrome import ChromeDriverManager
1715
except ImportError:
1816
# Selenium was not installed
1917
_has_selenium = False
@@ -28,16 +26,14 @@ def __init__(self, username: str, password: str):
2826
self.username = username
2927
self.password = password
3028
self.cookies = RequestsCookieJar()
31-
chrome_options = Options()
29+
chrome_options = webdriver.ChromeOptions()
3230
chrome_options.add_argument("--headless")
3331
chrome_options.add_argument("--no-sandbox")
3432
chrome_options.add_argument("--log-level=3")
3533
chrome_options.add_argument("--ignore-certificate-errors")
3634
chrome_options.add_argument("--ignore-ssl-errors")
37-
self.driver = webdriver.Chrome(
38-
service=ChromeService(ChromeDriverManager().install()),
39-
options=chrome_options,
40-
)
35+
service = Service()
36+
self.driver = webdriver.Chrome(service=service, options=chrome_options)
4137

4238
def login(self):
4339
try:

yahooquery/misc.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# stdlib
2+
import os
3+
14
# third party
25
import pandas as pd
36

@@ -20,8 +23,9 @@ def _make_request(
2023
country, ", ".join(sorted(COUNTRIES.keys()))
2124
)
2225
)
26+
setup_url = kwargs.pop("setup_url", os.getenv("YF_SETUP_URL", None))
2327
session = initialize_session(**kwargs)
24-
session = setup_session(session)
28+
session = setup_session(session, setup_url)
2529
crumb = get_crumb(session)
2630
if crumb is not None:
2731
params["crumb"] = crumb

yahooquery/utils/__init__.py

+36-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# third party
88
import pandas as pd
99
import requests
10+
from bs4 import BeautifulSoup
1011
from requests.adapters import HTTPAdapter
1112
from requests.exceptions import ConnectionError, RetryError, SSLError
1213
from requests.packages.urllib3.util.retry import Retry
@@ -17,7 +18,7 @@
1718

1819

1920
DEFAULT_TIMEOUT = 5
20-
21+
DEFAULT_SESSION_URL = "https://finance.yahoo.com"
2122
CRUMB_FAILURE = (
2223
"Failed to obtain crumb. Ability to retrieve data will be significantly limited."
2324
)
@@ -1366,8 +1367,8 @@ def initialize_session(session=None, **kwargs):
13661367
return session
13671368

13681369

1369-
def setup_session(session: requests.Session):
1370-
url = "https://finance.yahoo.com"
1370+
def setup_session(session: requests.Session, url: str = None):
1371+
url = url or DEFAULT_SESSION_URL
13711372
try:
13721373
response = session.get(url, allow_redirects=True)
13731374
except SSLError:
@@ -1380,10 +1381,39 @@ def setup_session(session: requests.Session):
13801381
except SSLError:
13811382
counter += 1
13821383

1383-
if not isinstance(session, FuturesSession):
1384-
return session
1384+
if isinstance(session, FuturesSession):
1385+
response = response.result()
1386+
1387+
# check for and handle consent page:w
1388+
if response.url.find("consent") >= 0:
1389+
logger.debug(f'Redirected to consent page: "{response.url}"')
1390+
1391+
soup = BeautifulSoup(response.content, "html.parser")
1392+
1393+
params = {}
1394+
for param in ["csrfToken", "sessionId"]:
1395+
try:
1396+
params[param] = soup.find("input", attrs={"name": param})["value"]
1397+
except Exception as exc:
1398+
logger.critical(
1399+
f'Failed to find or extract "{param}" from response. Exception={exc}'
1400+
)
1401+
return session
1402+
1403+
logger.debug(f"params: {params}")
1404+
1405+
response = session.post(
1406+
"https://consent.yahoo.com/v2/collectConsent",
1407+
data={
1408+
"agree": ["agree", "agree"],
1409+
"consentUUID": "default",
1410+
"sessionId": params["sessionId"],
1411+
"csrfToken": params["csrfToken"],
1412+
"originalDoneUrl": url,
1413+
"namespace": "yahoo",
1414+
},
1415+
)
13851416

1386-
_ = response.result()
13871417
return session
13881418

13891419

0 commit comments

Comments
 (0)