From 3d3175ca0ca10fe868d555513ab51069a33b4da2 Mon Sep 17 00:00:00 2001 From: Eric Pien Date: Sun, 1 Sep 2024 16:34:25 -0700 Subject: [PATCH] implementing support for funds data --- tests/test_ticker.py | 84 +++++++++++- yfinance/base.py | 8 ++ yfinance/scrapers/funds.py | 256 +++++++++++++++++++++++++++++++++++++ yfinance/ticker.py | 5 + 4 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 yfinance/scrapers/funds.py diff --git a/tests/test_ticker.py b/tests/test_ticker.py index 6a04661fa..27efcdacf 100644 --- a/tests/test_ticker.py +++ b/tests/test_ticker.py @@ -12,7 +12,7 @@ from .context import yfinance as yf from .context import session_gbl -from yfinance.exceptions import YFChartError, YFInvalidPeriodError, YFNotImplementedError, YFTickerMissingError, YFTzMissingError +from yfinance.exceptions import YFPricesMissingError, YFInvalidPeriodError, YFNotImplementedError, YFTickerMissingError, YFTzMissingError, YFDataException import unittest @@ -142,14 +142,14 @@ def test_prices_missing(self): # META call option, 2024 April 26th @ strike of 180000 tkr = 'META240426C00180000' dat = yf.Ticker(tkr, session=self.session) - with self.assertRaises(YFChartError): + with self.assertRaises(YFPricesMissingError): dat.history(period="5d", interval="1m", raise_errors=True) def test_ticker_missing(self): tkr = 'ATVI' dat = yf.Ticker(tkr, session=self.session) # A missing ticker can trigger either a niche error or the generalized error - with self.assertRaises((YFTickerMissingError, YFTzMissingError, YFChartError)): + with self.assertRaises((YFTickerMissingError, YFTzMissingError, YFPricesMissingError)): dat.history(period="3mo", interval="1d", raise_errors=True) def test_goodTicker(self): @@ -997,7 +997,84 @@ def test_complementary_info(self): # else: # raise +class TestTickerFundsData(unittest.TestCase): + session = None + + @classmethod + def setUpClass(cls): + cls.session = session_gbl + + @classmethod + def tearDownClass(cls): + if cls.session is not None: + cls.session.close() + + def setUp(self): + self.test_tickers = [yf.Ticker("SPY", session=self.session), # equity etf + yf.Ticker("JNK", session=self.session), # bonds etf + yf.Ticker("VTSAX", session=self.session)] # mutual fund + + def tearDown(self): + self.ticker = None + def test_fetch_and_parse(self): + try: + for ticker in self.test_tickers: + ticker.funds_data._fetch_and_parse() + + except Exception as e: + self.fail(f"_fetch_and_parse raised an exception unexpectedly: {e}") + + with self.assertRaises(YFDataException): + ticker = yf.Ticker("AAPL", session=self.session) # stock, not funds + ticker.funds_data._fetch_and_parse() + self.fail(f"_fetch_and_parse should have failed when calling for non-funds data: {e}") + + def test_description(self): + for ticker in self.test_tickers: + description = ticker.funds_data.description + self.assertIsInstance(description, str) + self.assertTrue(len(description) > 0) + + def test_fund_overview(self): + for ticker in self.test_tickers: + fund_overview = ticker.funds_data.fund_overview + self.assertIsInstance(fund_overview, pd.DataFrame) + + def test_fund_operations(self): + for ticker in self.test_tickers: + fund_operations = ticker.funds_data.fund_operations + self.assertIsInstance(fund_operations, pd.DataFrame) + + def test_asset_classes(self): + for ticker in self.test_tickers: + asset_classes = ticker.funds_data.asset_classes + self.assertIsInstance(asset_classes, pd.DataFrame) + + def test_top_holdings(self): + for ticker in self.test_tickers: + top_holdings = ticker.funds_data.top_holdings + self.assertIsInstance(top_holdings, pd.DataFrame) + + def test_equity_holdings(self): + for ticker in self.test_tickers: + equity_holdings = ticker.funds_data.equity_holdings + self.assertIsInstance(equity_holdings, pd.DataFrame) + + def test_bond_holdings(self): + for ticker in self.test_tickers: + bond_holdings = ticker.funds_data.bond_holdings + self.assertIsInstance(bond_holdings, pd.DataFrame) + + def test_bond_ratings(self): + for ticker in self.test_tickers: + bond_ratings = ticker.funds_data.bond_ratings + self.assertIsInstance(bond_ratings, pd.DataFrame) + + def test_sector_weightings(self): + for ticker in self.test_tickers: + sector_weightings = ticker.funds_data.sector_weightings + self.assertIsInstance(sector_weightings, pd.DataFrame) def suite(): suite = unittest.TestSuite() @@ -1007,6 +1084,7 @@ def suite(): suite.addTest(TestTickerHistory('Test Ticker history')) suite.addTest(TestTickerMiscFinancials('Test misc financials')) suite.addTest(TestTickerInfo('Test info & fast_info')) + suite.addTest(TestTickerFundsData('Test Funds Data')) return suite diff --git a/yfinance/base.py b/yfinance/base.py index 1a37c845e..96805d317 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -38,6 +38,7 @@ from .scrapers.holders import Holders from .scrapers.quote import Quote, FastInfo from .scrapers.history import PriceHistory +from .scrapers.funds import FundsData from .const import _BASE_URL_, _ROOT_URL_ @@ -70,6 +71,7 @@ def __init__(self, ticker, session=None, proxy=None): self._holders = Holders(self._data, self.ticker) self._quote = Quote(self._data, self.ticker) self._fundamentals = Fundamentals(self._data, self.ticker) + self._funds_data = None self._fast_info = None @@ -647,3 +649,9 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: def get_history_metadata(self, proxy=None) -> dict: return self._lazy_load_price_history().get_history_metadata(proxy) + + def get_funds_data(self, proxy=None) -> Optional[FundsData]: + if not self._funds_data: + self._funds_data = FundsData(self._data, self.ticker) + + return self._funds_data \ No newline at end of file diff --git a/yfinance/scrapers/funds.py b/yfinance/scrapers/funds.py new file mode 100644 index 000000000..830f4625e --- /dev/null +++ b/yfinance/scrapers/funds.py @@ -0,0 +1,256 @@ +import pandas as pd + +from yfinance.data import YfData +from yfinance.const import _BASE_URL_ +from yfinance.exceptions import YFDataException + +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" + +''' +Supports ETF and Mutual Funds Data +Queried Modules: quoteType, summaryProfile, fundProfile, topHoldings + +Notes: +- fundPerformance module is not implemented as better data is queriable using history +''' +class FundsData: + def __init__(self, data: YfData, symbol: str, proxy=None): + self._data = data + self._symbol = symbol + self.proxy = proxy + + # quoteType + self._quote_type = None + + # summaryProfile + self._description = None + + # fundProfile + self._fund_overview = None + self._fund_operations = None + + # topHoldings + self._asset_classes = None + self._top_holdings = None + self._equity_holdings = None + self._bond_holdings = None + self._bond_ratings = None + self._sector_weightings = None + + def quote_type(self) -> str: + if self._quote_type is None: + self._fetch_and_parse() + return self._quote_type + + @property + def description(self) -> str: + if self._description is None: + self._fetch_and_parse() + return self._description + + @property + def fund_overview(self) -> pd.DataFrame: + if self._fund_overview is None: + self._fetch_and_parse() + return self._fund_overview + + @property + def fund_operations(self) -> pd.DataFrame: + if self._fund_operations is None: + self._fetch_and_parse() + return self._fund_operations + + @property + def asset_classes(self) -> pd.DataFrame: + if self._asset_classes is None: + self._fetch_and_parse() + return self._asset_classes + + @property + def top_holdings(self) -> pd.DataFrame: + if self._top_holdings is None: + self._fetch_and_parse() + return self._top_holdings + + @property + def equity_holdings(self) -> pd.DataFrame: + if self._equity_holdings is None: + self._fetch_and_parse() + return self._equity_holdings + + @property + def bond_holdings(self) -> pd.DataFrame: + if self._bond_holdings is None: + self._fetch_and_parse() + return self._bond_holdings + + @property + def bond_ratings(self) -> pd.DataFrame: + if self._bond_ratings is None: + self._fetch_and_parse() + return self._bond_ratings + + @property + def sector_weightings(self) -> pd.DataFrame: + if self._sector_weightings is None: + self._fetch_and_parse() + return self._sector_weightings + + def _fetch(self, proxy): + modules = ','.join(["quoteType", "summaryProfile", "topHoldings", "fundPerformance", "fundProfile"]) + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_+self._symbol, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_and_parse(self) -> None: + result = self._fetch(self.proxy) + try: + data = result["quoteSummary"]["result"][0] + # check quote type + self._quote_type = data["quoteType"]["quoteType"] + + # parse "summaryProfile", "topHoldings", "fundProfile", "fundPerformance" + self._parse_description(data["summaryProfile"]) + self._parse_top_holdings(data["topHoldings"]) + self._parse_fund_profile(data["fundProfile"]) + except KeyError: + raise YFDataException("Failed to parse quote type. No Fund data found.") + except IndexError: + raise YFDataException("Failed to parse fund json data.") + + @staticmethod + def _parse_raw_values(data): + if not isinstance(data, dict): + return data + + return data.get("raw", pd.NA) + + def _parse_description(self, data) -> None: + self._description = data.get("longBusinessSummary", "") + + def _parse_top_holdings(self, data) -> None: # done + # asset classes + self._asset_classes = pd.DataFrame({ + "Asset Class": ["Cash", "Stock", "Bond", "Preferred", "Convertible", "Others"], + "% of AUM": [ + self._parse_raw_values(data.get("cashPosition", {})), + self._parse_raw_values(data.get("stockPosition", {})), + self._parse_raw_values(data.get("bondPosition", {})), + self._parse_raw_values(data.get("preferredPosition", {})), + self._parse_raw_values(data.get("convertiblePosition", {})), + self._parse_raw_values(data.get("otherPosition", {})) + ] + }).convert_dtypes() + + # top holdings + _holdings = data.get("holdings", []) + _symbol, _name, _holding_percent = [], [], [] + + for item in _holdings: + _symbol.append(item["symbol"]) + _name.append(item["holdingName"]) + _holding_percent.append(item["holdingPercent"]) + + self._top_holdings = pd.DataFrame({ + "Symbol": _symbol, + "Name": _name, + "Holding Percent": _holding_percent + }).convert_dtypes() + + # equity holdings + _equity_holdings = data.get("equityHoldings", {}) + self._equity_holdings = pd.DataFrame({ + "Average": ["Price/Earnings", "Price/Book", "Price/Sales", "Price/Cashflow", "Median Market Cap", "3 Year Earnings Growth"], + self._symbol: [ + self._parse_raw_values(_equity_holdings.get("priceToEarnings", {})), + self._parse_raw_values(_equity_holdings.get("priceToBook", {})), + self._parse_raw_values(_equity_holdings.get("priceToSales", {})), + self._parse_raw_values(_equity_holdings.get("priceToCashflow", {})), + self._parse_raw_values(_equity_holdings.get("medianMarketCap", {})), + self._parse_raw_values(_equity_holdings.get("threeYearEarningsGrowth", {})), + ], + "Category Average": [ + self._parse_raw_values(_equity_holdings.get("priceToEarningsCat", {})), + self._parse_raw_values(_equity_holdings.get("priceToBookCat", {})), + self._parse_raw_values(_equity_holdings.get("priceToSalesCat", {})), + self._parse_raw_values(_equity_holdings.get("priceToCashflowCat", {})), + self._parse_raw_values(_equity_holdings.get("medianMarketCapCat", {})), + self._parse_raw_values(_equity_holdings.get("threeYearEarningsGrowthCat", {})), + ] + }).convert_dtypes() + + # bond holdings + _bond_holdings = data.get("bondHoldings", {}) + self._bond_holdings = pd.DataFrame({ + "Average": ["Duration", "Maturity", "Credit Quality"], + self._symbol: [ + self._parse_raw_values(_bond_holdings.get("duration", {})), + self._parse_raw_values(_bond_holdings.get("maturity", {})), + self._parse_raw_values(_bond_holdings.get("creditQuality", {})), + ], + "Category Average": [ + self._parse_raw_values(_bond_holdings.get("durationCat", {})), + self._parse_raw_values(_bond_holdings.get("maturityCat", {})), + self._parse_raw_values(_bond_holdings.get("creditQualityCat", {})), + ] + }).convert_dtypes() + + # bond ratings + _bond_ratings = dict((key, d[key]) for d in data.get("bondRatings", []) for key in d) + if len(_bond_ratings) > 0: + self._bond_ratings = pd.DataFrame( + { + "Rating": list(_bond_ratings.keys()), + "Value": list(_bond_ratings.values()) + } + ).convert_dtypes() + else: + self._bond_ratings = pd.DataFrame() + + # sector weightings + _sector_weightings = dict((key, d[key]) for d in data.get("sectorWeightings", []) for key in d) + if len(_sector_weightings) > 0: + self._sector_weightings = pd.DataFrame( + { + "Sector": list(_sector_weightings.keys()), + "Value": list(_sector_weightings.values()) + } + ).convert_dtypes() + else: + self._sector_weightings = pd.DataFrame() + + def _parse_fund_performance(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Performance" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._perfomance = df + + def _parse_fund_profile(self, data): + self._fund_overview = pd.DataFrame({ + "Data": ["Category", "Family", "Legal Type"], + "Value": [ + data.get("categoryName", pd.NA), + data.get("family", pd.NA), + data.get("legalType", pd.NA) + ] + }) + + _fund_operations = data.get("feesExpensesInvestment", {}) + _fund_operations_cat = data.get("feesExpensesInvestmentCat", {}) + + self._fund_operations = pd.DataFrame({ + "Attributes": ["Annual Report Expense Ratio", "Annual Holdings Turnover", "Total Net Assets"], + self._symbol: [ + self._parse_raw_values(_fund_operations.get("annualReportExpenseRatio", {})), + self._parse_raw_values(_fund_operations.get("annualHoldingsTurnover", {})), + self._parse_raw_values(_fund_operations.get("totalNetAssets", {})) + ], + "Category Average": [ + self._parse_raw_values(_fund_operations_cat.get("annualReportExpenseRatio", {})), + self._parse_raw_values(_fund_operations_cat.get("annualHoldingsTurnover", {})), + self._parse_raw_values(_fund_operations_cat.get("totalNetAssets", {})) + ] + }).convert_dtypes() \ No newline at end of file diff --git a/yfinance/ticker.py b/yfinance/ticker.py index 535eab10d..a10c7980d 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -22,6 +22,7 @@ from __future__ import print_function from collections import namedtuple as _namedtuple +from .scrapers.funds import FundsData import pandas as _pd @@ -297,3 +298,7 @@ def earnings_dates(self) -> _pd.DataFrame: @property def history_metadata(self) -> dict: return self.get_history_metadata() + + @property + def funds_data(self) -> FundsData: + return self.get_funds_data() \ No newline at end of file