From 3e35ae766d02c45aa10ca9f74222d5c309a43b6d Mon Sep 17 00:00:00 2001 From: Mike Reiche Date: Sat, 3 Aug 2024 16:58:08 +0200 Subject: [PATCH 1/4] Improve reading timezone from earning dates and only fall back to general symbol timezone when decoding fails --- requirements.txt | 2 ++ tests/test_ticker.py | 25 ++++++++++++++++++------- yfinance/base.py | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/requirements.txt b/requirements.txt index f19ca36b1..fe2357376 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,5 @@ frozendict>=2.3.4 beautifulsoup4>=4.11.1 html5lib>=1.1 peewee>=3.16.2 +requests-cache==1.2.1 +requests-ratelimiter==0.7.0 diff --git a/tests/test_ticker.py b/tests/test_ticker.py index 84ed9f731..8c1e7c6d9 100644 --- a/tests/test_ticker.py +++ b/tests/test_ticker.py @@ -8,18 +8,18 @@ python -m unittest tests.ticker.TestTicker """ -import pandas as pd - -from .context import yfinance as yf -from .context import session_gbl -from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFTickerMissingError, YFTzMissingError - import unittest -import requests_cache from typing import Union, Any, get_args, _GenericAlias from urllib.parse import urlparse, parse_qs, urlencode, urlunparse +import pandas as pd +import requests_cache + +from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFTickerMissingError, YFTzMissingError +from .context import session_gbl +from .context import yfinance as yf + ticker_attributes = ( ("major_holders", pd.DataFrame), ("institutional_holders", pd.DataFrame), @@ -289,6 +289,7 @@ def test_earnings_dates(self): data = self.ticker.earnings_dates self.assertIsInstance(data, pd.DataFrame, "data has wrong type") self.assertFalse(data.empty, "data is empty") + self.assertEqual(data.index.tz.zone, "America/New_York") def test_earnings_dates_with_limit(self): # use ticker with lots of historic earnings @@ -298,6 +299,7 @@ def test_earnings_dates_with_limit(self): self.assertIsInstance(data, pd.DataFrame, "data has wrong type") self.assertFalse(data.empty, "data is empty") self.assertEqual(len(data), limit, "Wrong number or rows") + self.assertEqual(data.index[0].tz.zone, "America/New_York") data_cached = ticker.get_earnings_dates(limit=limit) self.assertIs(data, data_cached, "data not cached") @@ -323,6 +325,15 @@ def test_earnings_dates_with_limit(self): # data_cached = self.ticker.earnings_trend # self.assertIs(data, data_cached, "data not cached") + def test_ticker_has_tz(self): + test_data = {"AMZN": "America/New_York", "LHA.DE": "Europe/Berlin", "6758.T": "Asia/Tokyo"} + for symbol, tz in test_data.items(): + with self.subTest(f"{symbol}-{tz}"): + ticker = yf.Ticker(symbol) + data = ticker.get_earnings_dates(limit=1) + self.assertIsNotNone(data.index.tz) + self.assertEqual(data.index.tz.zone, tz) + class TestTickerHolders(unittest.TestCase): session = None diff --git a/yfinance/base.py b/yfinance/base.py index bf258a3f8..ec8847803 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -21,25 +21,26 @@ from __future__ import print_function -from io import StringIO import json as _json +import re import warnings +from io import StringIO from typing import Optional, Union from urllib.parse import quote as urlencode import pandas as pd +import pytz import requests from . import utils, cache +from .const import _BASE_URL_, _ROOT_URL_ from .data import YfData from .exceptions import YFEarningsDateMissing from .scrapers.analysis import Analysis from .scrapers.fundamentals import Fundamentals +from .scrapers.history import PriceHistory from .scrapers.holders import Holders from .scrapers.quote import Quote, FastInfo -from .scrapers.history import PriceHistory - -from .const import _BASE_URL_, _ROOT_URL_ class TickerBase: @@ -534,6 +535,15 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: logger = utils.get_yf_logger() + ticker_tz = "" + + def get_ticker_tz(): + nonlocal ticker_tz + if ticker_tz == "": + self._quote.proxy = proxy or self.proxy + ticker_tz = self._get_ticker_tz(proxy=proxy, timeout=30) + return ticker_tz + page_size = min(limit, 100) # YF caps at 100, don't go higher page_offset = 0 dates = None @@ -589,20 +599,20 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: # Parse earnings date string cn = "Earnings Date" - # - remove AM/PM and timezone from date string - tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$') - dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True) - # - split AM/PM from timezone - tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True) - tzinfo.columns = ["AM/PM", "TZ"] - # - combine and parse - dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"] - dates[cn] = pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p") - # - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info': - self._quote.proxy = proxy or self.proxy - tz = self._get_ticker_tz(proxy=proxy, timeout=30) - dates[cn] = dates[cn].dt.tz_localize(tz) + def map_date(time_str: str): + tz_match = re.search('([AP]M)([a-zA-Z]*)$', time_str) + tz_str = tz_match.group(2).strip() + # - remove AM/PM and timezone from date string + time_str = time_str.replace(tz_str, "") + try: + tz = pytz.timezone(tz_str) + except pytz.UnknownTimeZoneError: + tz = get_ticker_tz() + + return pd.to_datetime(time_str, format="%b %d, %Y, %I %p").tz_localize(tz) + + dates[cn] = dates[cn].map(map_date) dates = dates.set_index("Earnings Date") self._earnings_dates[limit] = dates From 9a37c7f0f60332033fb33be1f3d87ab11fe3024f Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Sun, 18 Aug 2024 20:12:43 +0100 Subject: [PATCH 2/4] Handle ambiguous timezones so no fallback needed. Also rearrange for cleaner merge --- yfinance/base.py | 62 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index ec8847803..4f25c1d0c 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -22,14 +22,12 @@ from __future__ import print_function import json as _json -import re import warnings from io import StringIO from typing import Optional, Union from urllib.parse import quote as urlencode import pandas as pd -import pytz import requests from . import utils, cache @@ -599,20 +597,58 @@ def get_ticker_tz(): # Parse earnings date string cn = "Earnings Date" + # - remove AM/PM and timezone from date string + tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$') + dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True) + # - split AM/PM from timezone + tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True) + tzinfo.columns = ["AM/PM", "TZ"] + # - combine and parse + dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"] + dates[cn] = pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p") + + # Try to remap all ambiguous timezone values: + tzinfo['TZ'] = tzinfo['TZ'].str.replace('BST', 'Europe/London') + tzinfo['TZ'] = tzinfo['TZ'].str.replace('GMT', 'Europe/London') + if '.' not in self.ticker: + tzinfo['TZ'] = tzinfo['TZ'].str.replace('EST', 'America/New_York') + elif self.ticker.endswith(".AX"): + tzinfo['TZ'] = tzinfo['TZ'].str.replace('EST', 'Australia/Sydney') + tzinfo['TZ'] = tzinfo['TZ'].str.replace('MST', 'America/Denver') + tzinfo['TZ'] = tzinfo['TZ'].str.replace('PST', 'America/Los_Angeles') + if'.' not in self.ticker: + tzinfo['TZ'] = tzinfo['TZ'].str.replace('CST', 'America/Chicago') + else: + # Revisit if Cuba get a stock exchange + tzinfo['TZ'] = tzinfo['TZ'].str.replace('CST', 'Asia/Shanghai') + if self.ticker.endswith('.TA'): + tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Asia/Jerusalem') + elif self.ticker.endswith('.IR'): + tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Europe/Dublin') + elif self.ticker.endswith('.NS'): + tzinfo['TZ'] = tzinfo['TZ'].str.replace('IST', 'Asia/Kolkata') + + # But in case still ambiguity that pytz cannot parse, have a backup: + self._quote.proxy = proxy or self.proxy + tz_backup = self._get_ticker_tz(proxy=proxy, timeout=30) - def map_date(time_str: str): - tz_match = re.search('([AP]M)([a-zA-Z]*)$', time_str) - tz_str = tz_match.group(2).strip() - # - remove AM/PM and timezone from date string - time_str = time_str.replace(tz_str, "") + if len(tzinfo['TZ'].unique())==1: try: - tz = pytz.timezone(tz_str) - except pytz.UnknownTimeZoneError: - tz = get_ticker_tz() - - return pd.to_datetime(time_str, format="%b %d, %Y, %I %p").tz_localize(tz) + dates[cn] = dates[cn].dt.tz_localize(tzinfo['TZ'].iloc[0]) + except Exception: + dates[cn] = dates[cn].dt.tz_localize(tz_backup) + else: + dates2 = [] + for i in range(len(dates)): + dt = dates[cn].iloc[i] + tz = tzinfo['TZ'].iloc[i] + try: + dt = dt.tz_localize(tz) + except Exception: + dt = dt.tz_localize(tz_backup) + dates2.append(dt) + dates[cn] = pd.to_datetime(dates2) - dates[cn] = dates[cn].map(map_date) dates = dates.set_index("Earnings Date") self._earnings_dates[limit] = dates From 30449a536de9306c5604d2925723e462ce07e05f Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Thu, 22 Aug 2024 21:22:57 +0100 Subject: [PATCH 3/4] Fix handling differnt TZs in earnings dates --- yfinance/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yfinance/base.py b/yfinance/base.py index 4f25c1d0c..58c21738d 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -647,7 +647,7 @@ def get_ticker_tz(): except Exception: dt = dt.tz_localize(tz_backup) dates2.append(dt) - dates[cn] = pd.to_datetime(dates2) + dates[cn] = pd.to_datetime(dates2, utc=True).tz_convert(dates2[0].tzinfo) dates = dates.set_index("Earnings Date") From 0c89fcd8f74764599c8a524f938f3f38ce757fd9 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Tue, 19 Nov 2024 21:41:42 +0000 Subject: [PATCH 4/4] Version 0.2.50 --- CHANGELOG.rst | 9 +++++++++ meta.yaml | 2 +- yfinance/version.py | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 56cc0234c..d44aa13ae 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,15 @@ Change Log =========== +0.2.50 +------ +Fixes: +- price repair #2111 #2139 +- download() appearance 2109 +- isin() error #2099 +- growth_estimates #2127 +Also new docs #2132 + 0.2.49 ------ Fix prices-clean rarely discarding good data #2122 diff --git a/meta.yaml b/meta.yaml index 444528f93..aaea9148e 100644 --- a/meta.yaml +++ b/meta.yaml @@ -1,5 +1,5 @@ {% set name = "yfinance" %} -{% set version = "0.2.49" %} +{% set version = "0.2.50" %} package: name: "{{ name|lower }}" diff --git a/yfinance/version.py b/yfinance/version.py index 761445011..756b7abb1 100644 --- a/yfinance/version.py +++ b/yfinance/version.py @@ -1 +1 @@ -version = "0.2.49" +version = "0.2.50"