Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
JackCollins1991 committed Aug 25, 2024
1 parent d3b836a commit 0fc3545
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 3 deletions.
138 changes: 135 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import pytest
import subprocess
import warnings
from datetime import datetime
from unittest.mock import patch
from edgar_tool.cli import SecEdgarScraperCli


def test_cli_should_return_help_string_when_passed_no_args():
"""Tests that running edgar-tool without any arguments returns the CLI's help string and 0 exit code."""
"""
Tests that running `edgar-tool` without any arguments returns the CLI's help string
and an exit code of 0.
"""
# GIVEN
expected = """
expected_help = """
NAME
edgar-tool
Expand All @@ -26,4 +34,128 @@ def test_cli_should_return_help_string_when_passed_no_args():

# THEN
assert result.returncode == 0
assert result.stdout.strip() == expected.strip()
assert result.stdout.strip() == expected_help.strip()


@patch('edgar_tool.text_search.EdgarTextSearcher.text_search')
def test_text_search_capture_arguments(mock_text_search):
"""
Tests that `SecEdgarScraperCli.text_search` correctly calls the `text_search` method
with the expected arguments.
"""
# ARRANGE: mock_text_search arg is provided by the patch decorator.

# ACT
SecEdgarScraperCli.text_search(
"Tsunami", "Hazards",
output="results.csv",
entity_id="0001030717",
filing_form="all_annual_quarterly_and_current_reports",
start_date="2021-01-01",
end_date="2021-12-31",
min_wait=5.0,
max_wait=7.0,
retries=3,
peo_in="NY, OH",
inc_in=None
)

# ASSERT
mock_text_search.assert_called_once_with(
keywords=["Tsunami", "Hazards"],
entity_id="0001030717",
filing_form="All annual, quarterly, and current reports", # Mapped with TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING
single_forms=None,
start_date=datetime(2021, 1, 1),
end_date=datetime(2021, 12, 31),
min_wait_seconds=5.0,
max_wait_seconds=7.0,
retries=3,
destination="results.csv",
peo_in="NY,OH", ## Whitespace stripped
inc_in=None
)

@patch("edgar_tool.text_search.write_results_to_file")
def test_text_search_end_to_end(mock_write_results_to_file):
"""
Tests the end-to-end functionality of `SecEdgarScraperCli.text_search` by
verifying that `text_search.write_results_to_file` is called with the correct parameters.
Uses patch to avoid file creation during testing.
Because this can fail due to internet connection issues, this raises a warning when it fails
instead of raising a unittest failure.
"""
# ARRANGE: mock_write_results_to_file arg is provided by the patch decorator.
try:
# ACT
SecEdgarScraperCli.text_search(
"John Doe",
output="results.csv",
start_date="2021-01-01",
end_date="2021-01-31"
)

# Extract and validate the call arguments
call_args = mock_write_results_to_file.call_args
results = list(call_args[0][0])

# ASSERT: Check if 'root_form' is present in the first result
assert 'root_form' in results[0][0]

except Exception as e:
# Because net connection or server issues can cause the above to fail.
warnings.warn(
f"An exception occurred: {str(e)}\n"
"There might be an issue with accessing the SEC website or the SEC's return payload.",
UserWarning
)


@patch('edgar_tool.text_search.EdgarTextSearcher.text_search')
def test_text_search_with_both_peo_in_and_inc_in(mock_text_search):
"""
Tests that `SecEdgarScraperCli.text_search` raises an exception if both `peo_in` and `inc_in`
are provided in the parameters.
"""
# ARRANGE: mock_text_search arg is provided by the patch decorator.
mock_text_search.side_effect = Exception("Use only one of peo_in or inc_in, not both.")

## ACT & ASSERT
with pytest.raises(Exception, match="Use only one of peo_in or inc_in, not both."):
SecEdgarScraperCli.text_search(
["Tsunami", "Hazards"],
start_date="2019-06-01",
end_date="2024-01-01",
inc_in="NY,OH",
peo_in="NY,OH"
)

@patch('edgar_tool.rss.write_results_to_file')
def test_rss_end_to_end(mock_rss):
"""
Tests that `SecEdgarScraperCli.rss` successfully retrieves the RSS feed.
Does not assert anything about the contents because they are liable to change.
Uses patch to suppress file creation during testing.
Because this can fail due to internet connection issues, this raises a warning when it fails
instead of raising a unittest failure.
"""
# ARRANGE: mock_rss arg is provided by the patch decorator.
try:
# ACT: simulates `edgar-tool rss "GOOG" --output "rss_feed.csv"`
SecEdgarScraperCli.rss(
"GOOG",
output="rss_feed.csv"
)

# ASSERT: Checks that rss.write_results_to_file would have been called,
# but does not call it to avoid file creation during testing.
assert mock_rss.call_args
except Exception as e:
# Because net connection or server issues can cause the above to fail.
warnings.warn(
f"An exception occurred: {str(e)}\n"
"There might be an issue with accessing the RSS feed or the return payload.",
UserWarning
)
57 changes: 57 additions & 0 deletions tests/test_text_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import urllib.parse
from datetime import date
from edgar_tool.cli import EdgarTextSearcher

def decode_url(query_string):
"""
Parses the query string anf extracts and sorts the 'forms'
parameter from a query string, then reconstructs the entire
query string with the sorted 'forms' parameter.
This is necessary because raw forms can be order agnostic and
URL strings cannot be compared directly due to encoding differences.
Args:
query_string (str): The query string to parse and modify.
Returns:
str: The query string with the 'forms' parameter sorted and the rest of the parameters unchanged.
"""
parsed_query = urllib.parse.parse_qs(query_string)
forms = parsed_query.get('forms', [''])[0]
sorted_forms = ','.join(sorted(forms.split(',')))

# Reconstruct the query string with the sorted 'forms' parameter
parsed_query['forms'] = sorted_forms
return urllib.parse.urlencode(parsed_query, doseq=True)

def test_generate_request_args():
"""
Tests `EdgarTextSearcher._generate_request_args` to ensure it produces
the correct query string, with 'forms' parameters being order-agnostic.
"""
# ARRANGE & ACT
result = EdgarTextSearcher._generate_request_args(
keywords=['Tsunami', 'Hazards'],
entity_id='0001030717',
filing_form="All annual, quarterly, and current reports",
single_forms=['8-K'],
start_date=date(2019, 6, 1),
end_date=date(2024, 1, 1),
peo_in=None,
inc_in="NY,OH"
)

expected = (
'q=Tsunami+Hazards&dateRange=custom&startdt=2019-06-01&enddt=2024-01-01'
'&locationCodes=NY,OH&locationType=incorporated&entityName=0001030717'
'&forms=15-12B,1-K,40-F,24F-2NT,N-30B-2,NT+10-D,ABS-15G,20-F,1-Z,15-15D'
',6-K,13F-NT,N-MFP1,10-QT,QRTLYRPT,11-KT,15-12G,DSTRBRPT,NSAR-B,25-NSE'
',ABS-EE,N-30D,N-MFP2,ANNLRPT,N-PX,25,NPORT-EX,SP+15D2,NT+20-F,1-SA'
',NSAR-A,1-U,13F-HR,8-K12G3,N-CSR,SD,NT+11-K,N-Q,40-17F2,8-K15D5'
',NT+10-K,10-KT,NSAR-U,NT+10-Q,10-D,15F-15D,10-K,N-CSRS,10-Q,18-K'
',IRANNOTICE,1-Z-W,15F-12G,11-K,N-CEN,15F-12B,N-MFP,8-K,40-17G'
)

# ASSERT
assert decode_url(result) == decode_url(expected)

0 comments on commit 0fc3545

Please sign in to comment.