Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a fallback for the CIK lookup. #123

Merged
merged 1 commit into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions edgar/reference/tickers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import json
from functools import lru_cache
from io import StringIO
from typing import Optional, Union, List
Expand Down Expand Up @@ -34,15 +35,23 @@ def cusip_ticker_mapping(allow_duplicate_cusips: bool = True) -> pd.DataFrame:

@lru_cache(maxsize=None)
def get_cik_tickers():
source = StringIO(download_file("https://www.sec.gov/include/ticker.txt", as_text=True))
data = pd.read_csv(source,
sep='\t',
header=None,
names=['ticker', 'cik']).dropna()
try:
source = StringIO(download_file("https://www.sec.gov/include/ticker.txt", as_text=True))
data = pd.read_csv(source,
sep='\t',
header=None,
names=['ticker', 'cik']).dropna()
except Exception:
# Fallback: Use the JSON data from the alternative URL
json_data = json.loads(download_file("https://www.sec.gov/files/company_tickers.json", as_text=True))
data = pd.DataFrame.from_dict(json_data, orient='index')
data = data.rename(columns={'ticker': 'ticker', 'cik_str': 'cik'})
data = data[['ticker', 'cik']]

# Ensure CIK is treated as an integer
data['cik'] = data['cik'].astype(int)

# Convert tickers to uppercase
data['ticker'] = data['ticker'].str.upper()

return data


Expand Down
46 changes: 45 additions & 1 deletion tests/test_reference.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from edgar.reference import cusip_ticker_mapping, get_ticker_from_cusip, describe_form
from edgar.reference.tickers import get_cik_tickers, find_cik, get_company_ticker_name_exchange, \
get_companies_by_exchange, get_mutual_fund_tickers, find_mutual_fund_cik

import json
from unittest.mock import patch
import pandas as pd

def test_cusip_ticker_mapping():
data = cusip_ticker_mapping()
Expand Down Expand Up @@ -89,3 +91,45 @@ def test_get_mutual_fund_tickers():
assert data.columns.tolist() == ['cik', 'seriesId', 'classId', 'ticker']

assert not data.query("ticker == 'CRBRX'").empty


def test_get_cik_tickers():
# Test normal behavior
data = get_cik_tickers()
assert isinstance(data, pd.DataFrame), "Result should be a pandas DataFrame"
assert set(data.columns) == {'ticker', 'cik'}, f"Columns should be 'ticker' and 'cik', got {data.columns}"
assert len(data) > 0, "DataFrame should not be empty"

# Test a few known tickers
apple = data[data['ticker'] == 'AAPL']
assert len(apple) == 1, "There should be exactly one entry for AAPL"
assert apple.iloc[0]['cik'] == 320193, f"AAPL CIK should be '320193', got {apple.iloc[0]['cik']}"

microsoft = data[data['ticker'] == 'MSFT']
assert len(microsoft) == 1, "There should be exactly one entry for MSFT"
assert microsoft.iloc[0]['cik'] == 789019, f"MSFT CIK should be '789019', got {microsoft.iloc[0]['cik']}"

# Test fallback mechanism
with patch('edgar.reference.tickers.download_file') as mock_download:
# First call raises an exception (primary URL fails)
# Second call returns valid JSON data (fallback URL succeeds)
mock_download.side_effect = [
Exception("Primary URL failed"),
json.dumps({
"0": {"cik_str": 320193, "ticker": "AAPL", "title": "Apple Inc."},
"1": {"cik_str": 789019, "ticker": "MSFT", "title": "MICROSOFT CORP"}
})
]

# Clear the lru_cache to ensure we're not getting cached results
get_cik_tickers.cache_clear()

fallback_data = get_cik_tickers()
assert isinstance(fallback_data, pd.DataFrame), "Fallback result should be a pandas DataFrame"
assert set(fallback_data.columns) == {'ticker', 'cik'}, f"Fallback columns should be 'ticker' and 'cik', got {fallback_data.columns}"
assert len(fallback_data) == 2, f"Fallback data should have 2 entries, got {len(fallback_data)}"
assert fallback_data['ticker'].tolist() == ['AAPL', 'MSFT'], f"Fallback tickers should be ['AAPL', 'MSFT'], got {fallback_data['ticker'].tolist()}"
assert fallback_data['cik'].tolist() == [320193, 789019], f"Fallback CIKs should be [320193, 789019], got {fallback_data['cik'].tolist()}"

# Verify that download_file was called twice
assert mock_download.call_count == 2, f"download_file should be called twice, was called {mock_download.call_count} times"
Loading