From 406393127da5081c7116a43e824a68bef87bf661 Mon Sep 17 00:00:00 2001 From: Jordan Gillard Date: Sun, 28 Jul 2024 18:03:19 +0000 Subject: [PATCH] Add filing categories to url_generator --- README.md | 4 +++- edgar_tool/url_generator.py | 45 +++++++++++++++++++++++++++++++++++-- tests/test_url_generator.py | 33 ++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7d6278f..426536c 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,9 @@ Usage with date range and export to custom CSV file edgar-tool text_search Tsunami Hazards --start_date "2021-01-01" --end_date "2021-12-31" --output "results.csv" ``` -# Usage with a partial set of filing forms + single forms +### Usage with a partial set of filing forms + single forms + +``` edgar-tool text_search Hurricane Damage --filing_form "registration_statements" --single_forms "['1-K', '1-SA']" ``` diff --git a/edgar_tool/url_generator.py b/edgar_tool/url_generator.py index 8693b8f..a3c041b 100644 --- a/edgar_tool/url_generator.py +++ b/edgar_tool/url_generator.py @@ -1,12 +1,29 @@ import datetime +import enum from typing import Literal, TypedDict from urllib import parse +filing_category_to_sec_form_id = { + "all": "", + "all_except_section_16": "form-cat0", + "all_annual_quarterly_and_current_reports": "form-cat1", + "all_section_16": "form-cat2", + "beneficial_ownership_reports": "form-cat3", + "exempt_offerings": "form-cat4", + "registration_statements": "form-cat5", + "filing_review_correspondence": "form-cat6", + "sec_orders_and_notices": "form-cat7", + "proxy_materials": "form-cat8", + "tender_offers_and_going_private_tx": "form-cat9", + "trust_indentures": "form-cat10", +} + + class SearchQueryKwargs(TypedDict, total=False): keywords: list[str] entity: str - filing_form: str + filing_category: str single_forms: list[str] date_range_select: Literal["all", "10y", "1y", "30d", "custom"] start_date: datetime.date @@ -50,7 +67,7 @@ def __init__(self, **query_args: SearchQueryKwargs): self._keywords = keywords self.entity = entity - self.filing_form = query_args.get("filing_form") + self._filing_category = query_args.get("filing_category", "all") self.single_forms = query_args.get("single_forms") self.date_range_select = date_range_select self.start_date = start_date @@ -67,6 +84,28 @@ def keywords(self): """Returns the keywords to search for, wrapping exact phrases in quotes.""" return [f'"{phrase}"' if " " in phrase else phrase for phrase in self._keywords] + @property + def filing_category(self): + return self._filing_category + + @keywords.getter + def filing_category(self): + filing_category_to_sec_form_id = { + "all": "", + "all_except_section_16": "form-cat0", + "all_annual_quarterly_and_current_reports": "form-cat1", + "all_section_16": "form-cat2", + "beneficial_ownership_reports": "form-cat3", + "exempt_offerings": "form-cat4", + "registration_statements": "form-cat5", + "filing_review_correspondence": "form-cat6", + "sec_orders_and_notices": "form-cat7", + "proxy_materials": "form-cat8", + "tender_offers_and_going_private_tx": "form-cat9", + "trust_indentures": "form-cat10", + } + return filing_category_to_sec_form_id[self._filing_category] + def generate_search_url_for_kwargs(search_kwargs: SearchQueryKwargs) -> str: base_url = "https://www.sec.gov/edgar/search/#/" @@ -87,6 +126,8 @@ def generate_search_url_for_kwargs(search_kwargs: SearchQueryKwargs) -> str: "enddt": validated_params.end_date.strftime("%Y-%m-%d"), } ) + if validated_params.filing_category: + query_params["category"] = validated_params.filing_category encoded_params = parse.urlencode( query_params, doseq=True, encoding="utf-8", quote_via=parse.quote ) diff --git a/tests/test_url_generator.py b/tests/test_url_generator.py index ca6045a..35921ed 100644 --- a/tests/test_url_generator.py +++ b/tests/test_url_generator.py @@ -118,7 +118,9 @@ def test_should_raise_if_date_range_select_invalid(): ({"date_range_select": "30d"}, "&dateRange=30d"), ], ) -def test_generates_correct_url_for_date(date_kwargs, url_ending): +def test_generates_correct_url_for_date_ranges(date_kwargs, url_ending): + """Tests that various date range options are correctly translated + into the seach URL.""" # GIVEN expected_url = ( f"https://www.sec.gov/edgar/search/#/q=%22Ford%20Motor%20Co%22{url_ending}" @@ -130,3 +132,32 @@ def test_generates_correct_url_for_date(date_kwargs, url_ending): # THEN assert actual_url == expected_url + + +@pytest.mark.parametrize( + "filing_category, url_ending", + ( + ("all", ""), + ("all_except_section_16", "&category=form-cat0"), + ("all_annual_quarterly_and_current_reports", "&category=form-cat1"), + ("all_section_16", "&category=form-cat2"), + ("beneficial_ownership_reports", "&category=form-cat3"), + ("exempt_offerings", "&category=form-cat4"), + ("registration_statements", "&category=form-cat5"), + ("filing_review_correspondence", "&category=form-cat6"), + ("sec_orders_and_notices", "&category=form-cat7"), + ("proxy_materials", "&category=form-cat8"), + ("tender_offers_and_going_private_tx", "&category=form-cat9"), + ("trust_indentures", "&category=form-cat10"), + ), +) +def test_generates_correct_url_for_filing_category(filing_category, url_ending): + # GIVEN + expected_url = f"https://www.sec.gov/edgar/search/#/q=Ignore{url_ending}" + test_kwargs = {"keywords": ["Ignore"], "filing_category": filing_category} + + # WHEN + actual_url = url_generator.generate_search_url_for_kwargs(test_kwargs) + + # THEN + assert actual_url == expected_url