Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for reading company ticker data from local #138

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,12 @@ The data is stored by default in the `~/.edgar` directory. You can change this b
```bash

```python
def download_edgar_data(submissions: bool = True, facts: bool = True):
def download_edgar_data(submissions: bool = True, facts: bool = True, reference: bool = True):
"""
Download all the company data from Edgar
:param submissions: Download all the company submissions
:param facts: Download all the company facts
:param reference: Download reference data
"""
download_edgar_data()

Expand Down
3 changes: 3 additions & 0 deletions edgar/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ def download_edgar_data(submissions: bool = True,
if facts:
from edgar.entities import download_facts
download_facts()
if reference:
from edgar.reference import download_reference_data
download_reference_data()


class InvalidDateException(Exception):
Expand Down
51 changes: 45 additions & 6 deletions edgar/reference/tickers.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import re
import os
import json
from functools import lru_cache
from io import StringIO
from pathlib import Path
from typing import Optional, Union, List
from typing import Optional, Union, List, Dict, Any

import pandas as pd
import pyarrow as pa
from httpx import HTTPStatusError

from edgar.httprequests import download_file, download_json, download_datafile
from edgar.reference.data.common import read_parquet_from_package
from edgar.core import log
from edgar.core import log, get_edgar_data_directory

__all__ = ['cusip_ticker_mapping', 'get_ticker_from_cusip', 'get_company_tickers', 'get_icon_from_ticker', 'find_cik',
'get_cik_tickers', 'get_company_ticker_name_exchange', 'get_companies_by_exchange',
Expand All @@ -37,6 +39,20 @@ def cusip_ticker_mapping(allow_duplicate_cusips: bool = True) -> pd.DataFrame:
df = df[~df.index.duplicated(keep='first')]
return df


def load_tickers_from_local() -> Optional[Dict[str, Any]]:
"""
Load tickers from local data
"""
reference_dir = get_edgar_data_directory() / "reference"
if not reference_dir.exists():
return None
company_tickers_file = reference_dir / os.path.basename(company_tickers_json_url)
if not company_tickers_file.exists():
return None
return json.loads(company_tickers_file.read_text())


@lru_cache(maxsize=1)
def get_company_tickers(
as_dataframe: bool = True,
Expand All @@ -63,8 +79,13 @@ def get_company_tickers(
])

try:
# Download JSON data
tickers_json = download_json(company_tickers_json_url)
if os.getenv("EDGAR_USE_LOCAL_DATA"):
tickers_json = load_tickers_from_local()
if not tickers_json:
tickers_json = download_json(company_tickers_json_url)
else:
# Download JSON data
tickers_json = download_json(company_tickers_json_url)

# Pre-allocate lists for better memory efficiency
ciks = []
Expand Down Expand Up @@ -110,18 +131,36 @@ def get_company_tickers(
log.error(f"Error fetching company tickers from [{company_tickers_json_url}]: {str(e)}")
raise

def load_cik_tickers_from_local() -> Optional[str]:
"""
Load tickers.txt from local data
"""
reference_dir = get_edgar_data_directory() / "reference"
if not reference_dir.exists():
return None
tickers_txt_file = reference_dir / os.path.basename(ticker_txt_url)
if not tickers_txt_file.exists():
return None
return tickers_txt_file.read_text()

def get_cik_tickers_from_ticker_txt():
"""Get CIK and ticker data from ticker.txt file"""
try:
source = StringIO(download_file(ticker_txt_url, as_text=True))
if os.getenv("EDGAR_USE_LOCAL_DATA"):
ticker_txt = load_cik_tickers_from_local()
if not ticker_txt:
ticker_txt = download_file(ticker_txt_url, as_text=True)
else:
ticker_txt = download_file(ticker_txt_url, as_text=True)
source = StringIO(ticker_txt)
data = pd.read_csv(source,
sep='\t',
header=None,
names=['ticker', 'cik']).dropna()
data['ticker'] = data['ticker'].str.upper()
return data
except Exception as e:
log.error(f"Error fetching company tickers from [{company_tickers_json_url}]: {str(e)}")
log.error(f"Error fetching company tickers from [{ticker_txt_url}]: {str(e)}")
return None

@lru_cache(maxsize=1)
Expand Down
Loading