dgunning · dgunning · Oct 8, 2024 · Oct 7, 2024
diff --git a/edgar/reference/tickers.py b/edgar/reference/tickers.py
@@ -1,4 +1,5 @@
 import re
+import json
 from functools import lru_cache
 from io import StringIO
 from typing import Optional, Union, List
@@ -34,15 +35,23 @@ def cusip_ticker_mapping(allow_duplicate_cusips: bool = True) -> pd.DataFrame:
 
 @lru_cache(maxsize=None)
 def get_cik_tickers():
-    source = StringIO(download_file("https://www.sec.gov/include/ticker.txt", as_text=True))
-    data = pd.read_csv(source,
-                       sep='\t',
-                       header=None,
-                       names=['ticker', 'cik']).dropna()
+    try:
+        source = StringIO(download_file("https://www.sec.gov/include/ticker.txt", as_text=True))
+        data = pd.read_csv(source,
+                           sep='\t',
+                           header=None,
+                           names=['ticker', 'cik']).dropna()
+    except Exception:
+        # Fallback: Use the JSON data from the alternative URL
+        json_data = json.loads(download_file("https://www.sec.gov/files/company_tickers.json", as_text=True))
+        data = pd.DataFrame.from_dict(json_data, orient='index')
+        data = data.rename(columns={'ticker': 'ticker', 'cik_str': 'cik'})
+        data = data[['ticker', 'cik']]
+
+        # Ensure CIK is treated as an integer
+        data['cik'] = data['cik'].astype(int)
 
-    # Convert tickers to uppercase
     data['ticker'] = data['ticker'].str.upper()
-
     return data
 
 

diff --git a/tests/test_reference.py b/tests/test_reference.py
@@ -1,7 +1,9 @@
 from edgar.reference import cusip_ticker_mapping, get_ticker_from_cusip, describe_form
 from edgar.reference.tickers import get_cik_tickers, find_cik, get_company_ticker_name_exchange, \
     get_companies_by_exchange, get_mutual_fund_tickers, find_mutual_fund_cik
-
+import json
+from unittest.mock import patch
+import pandas as pd
 
 def test_cusip_ticker_mapping():
     data = cusip_ticker_mapping()
@@ -89,3 +91,45 @@ def test_get_mutual_fund_tickers():
     assert data.columns.tolist() == ['cik', 'seriesId', 'classId', 'ticker']
 
     assert not data.query("ticker == 'CRBRX'").empty
+
+
+def test_get_cik_tickers():
+    # Test normal behavior
+    data = get_cik_tickers()
+    assert isinstance(data, pd.DataFrame), "Result should be a pandas DataFrame"
+    assert set(data.columns) == {'ticker', 'cik'}, f"Columns should be 'ticker' and 'cik', got {data.columns}"
+    assert len(data) > 0, "DataFrame should not be empty"
+
+    # Test a few known tickers
+    apple = data[data['ticker'] == 'AAPL']
+    assert len(apple) == 1, "There should be exactly one entry for AAPL"
+    assert apple.iloc[0]['cik'] == 320193, f"AAPL CIK should be '320193', got {apple.iloc[0]['cik']}"
+
+    microsoft = data[data['ticker'] == 'MSFT']
+    assert len(microsoft) == 1, "There should be exactly one entry for MSFT"
+    assert microsoft.iloc[0]['cik'] == 789019, f"MSFT CIK should be '789019', got {microsoft.iloc[0]['cik']}"
+
+    # Test fallback mechanism
+    with patch('edgar.reference.tickers.download_file') as mock_download:
+        # First call raises an exception (primary URL fails)
+        # Second call returns valid JSON data (fallback URL succeeds)
+        mock_download.side_effect = [
+            Exception("Primary URL failed"),
+            json.dumps({
+                "0": {"cik_str": 320193, "ticker": "AAPL", "title": "Apple Inc."},
+                "1": {"cik_str": 789019, "ticker": "MSFT", "title": "MICROSOFT CORP"}
+            })
+        ]
+
+        # Clear the lru_cache to ensure we're not getting cached results
+        get_cik_tickers.cache_clear()
+
+        fallback_data = get_cik_tickers()
+        assert isinstance(fallback_data, pd.DataFrame), "Fallback result should be a pandas DataFrame"
+        assert set(fallback_data.columns) == {'ticker', 'cik'}, f"Fallback columns should be 'ticker' and 'cik', got {fallback_data.columns}"
+        assert len(fallback_data) == 2, f"Fallback data should have 2 entries, got {len(fallback_data)}"
+        assert fallback_data['ticker'].tolist() == ['AAPL', 'MSFT'], f"Fallback tickers should be ['AAPL', 'MSFT'], got {fallback_data['ticker'].tolist()}"
+        assert fallback_data['cik'].tolist() == [320193, 789019], f"Fallback CIKs should be [320193, 789019], got {fallback_data['cik'].tolist()}"
+
+        # Verify that download_file was called twice
+        assert mock_download.call_count == 2, f"download_file should be called twice, was called {mock_download.call_count} times"