Skip to content

Commit

Permalink
Add filing categories to url_generator
Browse files Browse the repository at this point in the history
  • Loading branch information
jordan-gillard authored Jul 28, 2024
1 parent 9b51728 commit 4063931
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 4 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ Usage with date range and export to custom CSV file
edgar-tool text_search Tsunami Hazards --start_date "2021-01-01" --end_date "2021-12-31" --output "results.csv"
```

# Usage with a partial set of filing forms + single forms
### Usage with a partial set of filing forms + single forms

```
edgar-tool text_search Hurricane Damage --filing_form "registration_statements" --single_forms "['1-K', '1-SA']"
```

Expand Down
45 changes: 43 additions & 2 deletions edgar_tool/url_generator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
import datetime
import enum
from typing import Literal, TypedDict
from urllib import parse


filing_category_to_sec_form_id = {
"all": "",
"all_except_section_16": "form-cat0",
"all_annual_quarterly_and_current_reports": "form-cat1",
"all_section_16": "form-cat2",
"beneficial_ownership_reports": "form-cat3",
"exempt_offerings": "form-cat4",
"registration_statements": "form-cat5",
"filing_review_correspondence": "form-cat6",
"sec_orders_and_notices": "form-cat7",
"proxy_materials": "form-cat8",
"tender_offers_and_going_private_tx": "form-cat9",
"trust_indentures": "form-cat10",
}


class SearchQueryKwargs(TypedDict, total=False):
keywords: list[str]
entity: str
filing_form: str
filing_category: str
single_forms: list[str]
date_range_select: Literal["all", "10y", "1y", "30d", "custom"]
start_date: datetime.date
Expand Down Expand Up @@ -50,7 +67,7 @@ def __init__(self, **query_args: SearchQueryKwargs):

self._keywords = keywords
self.entity = entity
self.filing_form = query_args.get("filing_form")
self._filing_category = query_args.get("filing_category", "all")
self.single_forms = query_args.get("single_forms")
self.date_range_select = date_range_select
self.start_date = start_date
Expand All @@ -67,6 +84,28 @@ def keywords(self):
"""Returns the keywords to search for, wrapping exact phrases in quotes."""
return [f'"{phrase}"' if " " in phrase else phrase for phrase in self._keywords]

@property
def filing_category(self):
return self._filing_category

@keywords.getter
def filing_category(self):
filing_category_to_sec_form_id = {
"all": "",
"all_except_section_16": "form-cat0",
"all_annual_quarterly_and_current_reports": "form-cat1",
"all_section_16": "form-cat2",
"beneficial_ownership_reports": "form-cat3",
"exempt_offerings": "form-cat4",
"registration_statements": "form-cat5",
"filing_review_correspondence": "form-cat6",
"sec_orders_and_notices": "form-cat7",
"proxy_materials": "form-cat8",
"tender_offers_and_going_private_tx": "form-cat9",
"trust_indentures": "form-cat10",
}
return filing_category_to_sec_form_id[self._filing_category]


def generate_search_url_for_kwargs(search_kwargs: SearchQueryKwargs) -> str:
base_url = "https://www.sec.gov/edgar/search/#/"
Expand All @@ -87,6 +126,8 @@ def generate_search_url_for_kwargs(search_kwargs: SearchQueryKwargs) -> str:
"enddt": validated_params.end_date.strftime("%Y-%m-%d"),
}
)
if validated_params.filing_category:
query_params["category"] = validated_params.filing_category
encoded_params = parse.urlencode(
query_params, doseq=True, encoding="utf-8", quote_via=parse.quote
)
Expand Down
33 changes: 32 additions & 1 deletion tests/test_url_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ def test_should_raise_if_date_range_select_invalid():
({"date_range_select": "30d"}, "&dateRange=30d"),
],
)
def test_generates_correct_url_for_date(date_kwargs, url_ending):
def test_generates_correct_url_for_date_ranges(date_kwargs, url_ending):
"""Tests that various date range options are correctly translated
into the seach URL."""
# GIVEN
expected_url = (
f"https://www.sec.gov/edgar/search/#/q=%22Ford%20Motor%20Co%22{url_ending}"
Expand All @@ -130,3 +132,32 @@ def test_generates_correct_url_for_date(date_kwargs, url_ending):

# THEN
assert actual_url == expected_url


@pytest.mark.parametrize(
"filing_category, url_ending",
(
("all", ""),
("all_except_section_16", "&category=form-cat0"),
("all_annual_quarterly_and_current_reports", "&category=form-cat1"),
("all_section_16", "&category=form-cat2"),
("beneficial_ownership_reports", "&category=form-cat3"),
("exempt_offerings", "&category=form-cat4"),
("registration_statements", "&category=form-cat5"),
("filing_review_correspondence", "&category=form-cat6"),
("sec_orders_and_notices", "&category=form-cat7"),
("proxy_materials", "&category=form-cat8"),
("tender_offers_and_going_private_tx", "&category=form-cat9"),
("trust_indentures", "&category=form-cat10"),
),
)
def test_generates_correct_url_for_filing_category(filing_category, url_ending):
# GIVEN
expected_url = f"https://www.sec.gov/edgar/search/#/q=Ignore{url_ending}"
test_kwargs = {"keywords": ["Ignore"], "filing_category": filing_category}

# WHEN
actual_url = url_generator.generate_search_url_for_kwargs(test_kwargs)

# THEN
assert actual_url == expected_url

0 comments on commit 4063931

Please sign in to comment.