From dc957eeb0ede9efee641192a6a9a60c3480d0f71 Mon Sep 17 00:00:00 2001 From: "Julia L. Wang" Date: Sun, 10 Dec 2023 12:39:50 -0500 Subject: [PATCH] Implementation of holders data --- README.md | 3 + tests/ticker.py | 27 ++++ yfinance/base.py | 24 ++++ yfinance/scrapers/holders.py | 232 +++++++++++++++++++++++++++++------ yfinance/ticker.py | 12 ++ 5 files changed, 262 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 6e356998a..862afad5c 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,9 @@ msft.quarterly_cashflow msft.major_holders msft.institutional_holders msft.mutualfund_holders +msft.insider_transactions +msft.insider_purchases +msft.insider_roster_holders # Show future and historic earnings dates, returns at most next 4 quarters and last 8 quarters by default. # Note: If more are needed use msft.get_earnings_dates(limit=XX) with increased limit argument. diff --git a/tests/ticker.py b/tests/ticker.py index 8f41cca65..e1678ab63 100644 --- a/tests/ticker.py +++ b/tests/ticker.py @@ -24,6 +24,9 @@ ("major_holders", pd.DataFrame), ("institutional_holders", pd.DataFrame), ("mutualfund_holders", pd.DataFrame), + ("insider_transactions", pd.DataFrame), + ("insider_purchases", pd.DataFrame), + ("insider_roster_holders", pd.DataFrame), ("splits", pd.Series), ("actions", pd.DataFrame), ("shares", pd.DataFrame), @@ -338,6 +341,30 @@ def test_mutualfund_holders(self): data_cached = self.ticker.mutualfund_holders self.assertIs(data, data_cached, "data not cached") + def test_insider_transactions(self): + data = self.ticker.insider_transactions + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_transactions + self.assertIs(data, data_cached, "data not cached") + + def test_insider_purchases(self): + data = self.ticker.insider_purchases + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_purchases + self.assertIs(data, data_cached, "data not cached") + + def test_insider_roster_holders(self): + data = self.ticker.insider_roster_holders + self.assertIsInstance(data, pd.DataFrame, "data has wrong type") + self.assertFalse(data.empty, "data is empty") + + data_cached = self.ticker.insider_roster_holders + self.assertIs(data, data_cached, "data not cached") + class TestTickerMiscFinancials(unittest.TestCase): session = None diff --git a/yfinance/base.py b/yfinance/base.py index 66cc7aaf9..0e0c2034b 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -1747,6 +1747,30 @@ def get_mutualfund_holders(self, proxy=None, as_dict=False): if as_dict: return data.to_dict() return data + + def get_insider_purchases(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_purchases + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_transactions(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_transactions + if data is not None: + if as_dict: + return data.to_dict() + return data + + def get_insider_roster_holders(self, proxy=None, as_dict=False): + self._holders.proxy = proxy or self.proxy + data = self._holders.insider_roster + if data is not None: + if as_dict: + return data.to_dict() + return data def get_info(self, proxy=None) -> dict: self._quote.proxy = proxy or self.proxy diff --git a/yfinance/scrapers/holders.py b/yfinance/scrapers/holders.py index 1376d41a1..90db8bae9 100644 --- a/yfinance/scrapers/holders.py +++ b/yfinance/scrapers/holders.py @@ -1,8 +1,12 @@ -from io import StringIO +# from io import StringIO import pandas as pd from yfinance.data import YfData +from yfinance.const import _BASE_URL_ +from yfinance.exceptions import YFinanceDataException + +_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/" class Holders: @@ -14,57 +18,213 @@ def __init__(self, data: YfData, symbol: str, proxy=None): self.proxy = proxy self._major = None + self._major_direct_holders = None self._institutional = None self._mutualfund = None + self._insider_transactions = None + self._insider_purchases = None + self._insider_roster = None + @property def major(self) -> pd.DataFrame: if self._major is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._major @property def institutional(self) -> pd.DataFrame: if self._institutional is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._institutional @property def mutualfund(self) -> pd.DataFrame: if self._mutualfund is None: - self._scrape(self.proxy) + # self._scrape(self.proxy) + self._fetch_and_parse() return self._mutualfund - def _scrape(self, proxy): - ticker_url = f"{self._SCRAPE_URL_}/{self._symbol}" + @property + def insider_transactions(self) -> pd.DataFrame: + if self._insider_transactions is None: + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() + return self._insider_transactions + + @property + def insider_purchases(self) -> pd.DataFrame: + if self._insider_purchases is None: + # self._scrape_insider_transactions(self.proxy) + self._fetch_and_parse() + return self._insider_purchases + + @property + def insider_roster(self) -> pd.DataFrame: + if self._insider_roster is None: + # self._scrape_insider_ros(self.proxy) + self._fetch_and_parse() + return self._insider_roster + + def _fetch(self, proxy): + modules = ','.join( + ["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"]) + params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"} + result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_, user_agent_headers=self._data.user_agent_headers, params=params_dict, proxy=proxy) + return result + + def _fetch_and_parse(self): + result = self._fetch(self.proxy) try: - resp = self._data.cache_get(ticker_url + '/holders', proxy=proxy) - holders = pd.read_html(StringIO(resp.text)) - except Exception: - holders = [] - - if len(holders) >= 3: - self._major = holders[0] - self._institutional = holders[1] - self._mutualfund = holders[2] - elif len(holders) >= 2: - self._major = holders[0] - self._institutional = holders[1] - elif len(holders) >= 1: - self._major = holders[0] - - if self._institutional is not None: - if 'Date Reported' in self._institutional: - self._institutional['Date Reported'] = pd.to_datetime( - self._institutional['Date Reported']) - if '% Out' in self._institutional: - self._institutional['% Out'] = self._institutional[ - '% Out'].str.replace('%', '').astype(float) / 100 - - if self._mutualfund is not None: - if 'Date Reported' in self._mutualfund: - self._mutualfund['Date Reported'] = pd.to_datetime( - self._mutualfund['Date Reported']) - if '% Out' in self._mutualfund: - self._mutualfund['% Out'] = self._mutualfund[ - '% Out'].str.replace('%', '').astype(float) / 100 + data = result["quoteSummary"]["result"][0] + # parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity" + self._parse_institution_ownership(data["institutionOwnership"]) + self._parse_fund_ownership(data["fundOwnership"]) + # self._parse_major_direct_holders(data["majorDirectHolders"]) # need more data to investigate + self._parse_major_holders_breakdown(data["majorHoldersBreakdown"]) + self._parse_insider_transactions(data["insiderTransactions"]) + self._parse_insider_holders(data["insiderHolders"]) + self._parse_net_share_purchase_activity(data["netSharePurchaseActivity"]) + except (KeyError, IndexError): + raise YFinanceDataException("Failed to parse holders json data.") + + @staticmethod + def _parse_raw_values(data): + if isinstance(data, dict) and "raw" in data: + return data["raw"] + return data + + def _parse_institution_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out" + self._institutional = df + + def _parse_fund_ownership(self, data): + holders = data["ownershipList"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) + self._mutualfund = df + + def _parse_major_direct_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s") + df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True) + self._major_direct_holders = df + + def _parse_major_holders_breakdown(self, data): + if "maxAge" in data: + del data["maxAge"] + df = pd.DataFrame.from_dict(data, orient="index") + if not df.empty: + df.columns.name = "Breakdown" + df.rename(columns={df.columns[0]: 'Value'}, inplace=True) + self._major = df + + def _parse_insider_transactions(self, data): + holders = data["transactions"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["startDate"] = pd.to_datetime(df["startDate"], unit="s") + df.rename(columns={ + "startDate": "Start Date", + "filerName": "Insider", + "filerRelation": "Position", + "filerUrl": "URL", + "moneyText": "Transaction", + "transactionText": "Text", + "shares": "Shares", + "value": "Value", + "ownership": "Ownership" # ownership flag, direct or institutional + }, inplace=True) + self._insider_transactions = df + + def _parse_insider_holders(self, data): + holders = data["holders"] + for owner in holders: + for k, v in owner.items(): + owner[k] = self._parse_raw_values(v) + del owner["maxAge"] + df = pd.DataFrame(holders) + if not df.empty: + df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s") + df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s") + + df.rename(columns={ + "name": "Name", + "relation": "Position", + "url": "URL", + "transactionDescription": "Most Recent Transaction", + "latestTransDate": "Latest Transaction Date", + "positionDirectDate": "Position Direct Date", + "positionDirect": "Shares Owned Directly", + "positionIndirectDate": "Position Indirect Date", + "positionIndirect": "Shares Owned Indirectly" + }, inplace=True) + + df["Name"] = df["Name"].astype(str) + df["Position"] = df["Position"].astype(str) + df["URL"] = df["URL"].astype(str) + df["Most Recent Transaction"] = df["Most Recent Transaction"].astype(str) + + self._insider_roster = df + + def _parse_net_share_purchase_activity(self, data): + df = pd.DataFrame( + { + "Insider Purchases Last " + data.get("period", ""): [ + "Purchases", + "Sales", + "Net Shares Purchased (Sold)", + "Total Insider Shares Held", + "% Net Shares Purchased (Sold)", + "% Buy Shares", + "% Sell Shares" + ], + "Shares": [ + data.get('buyInfoShares'), + data.get('sellInfoShares'), + data.get('netInfoShares'), + data.get('totalInsiderShares'), + data.get('netPercentInsiderShares'), + data.get('buyPercentInsiderShares'), + data.get('sellPercentInsiderShares') + ], + "Trans": [ + data.get('buyInfoCount'), + data.get('sellInfoCount'), + data.get('netInfoCount'), + pd.NA, + pd.NA, + pd.NA, + pd.NA + ] + } + ).convert_dtypes() + self._insider_purchases = df + + \ No newline at end of file diff --git a/yfinance/ticker.py b/yfinance/ticker.py index af8dd750c..580481ccd 100644 --- a/yfinance/ticker.py +++ b/yfinance/ticker.py @@ -117,6 +117,18 @@ def institutional_holders(self) -> _pd.DataFrame: def mutualfund_holders(self) -> _pd.DataFrame: return self.get_mutualfund_holders() + @property + def insider_purchases(self) -> _pd.DataFrame: + return self.get_insider_purchases() + + @property + def insider_transactions(self) -> _pd.DataFrame: + return self.get_insider_transactions() + + @property + def insider_roster_holders(self) -> _pd.DataFrame: + return self.get_insider_roster_holders() + @property def dividends(self) -> _pd.Series: return self.get_dividends()