Skip to content

Commit

Permalink
Support exact search with quotes in the notebook (#33)
Browse files Browse the repository at this point in the history
* Remove sys.exit from cli.py

* Convert notebook to handle exceptions rather than sys exit

* Bump version
  • Loading branch information
GalenReich authored Jul 2, 2024
1 parent 97b60fd commit b6cb1a6
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 42 deletions.
31 changes: 14 additions & 17 deletions edgar_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,23 +132,20 @@ def text_search(
destination=output,
)
scraper = EdgarTextSearcher()
try:
scraper.text_search(
keywords=keywords,
entity_id=entity_id,
filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form),
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
min_wait_seconds=min_wait,
max_wait_seconds=max_wait,
retries=retries,
destination=output,
peo_in=peo_in,
inc_in=inc_in
)
except NoResultsFoundError as e:
sys.exit(2)
scraper.text_search(
keywords=keywords,
entity_id=entity_id,
filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form),
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
min_wait_seconds=min_wait,
max_wait_seconds=max_wait,
retries=retries,
destination=output,
peo_in=peo_in,
inc_in=inc_in
)

@staticmethod
def rss(
Expand Down
57 changes: 33 additions & 24 deletions notebook/Bellingcat_EDGAR_Tool.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"from ipywidgets import widgets\n",
"from IPython.display import display\n",
"from IPython import get_ipython\n",
"from contextlib import redirect_stdout\n",
"\n",
"data_table.enable_dataframe_formatter()\n",
"\n",
Expand All @@ -74,33 +75,41 @@
"\n",
"# Install the EDGAR search tool on the first run\n",
"![ ! -f \"edgar_tool_installed\" ] && echo -n \"Loading the EDGAR Tool on first search...\" && pip install edgar-tool >> {logfile} 2>&1 && pip install pandas==1.5.3 >> {logfile} 2>&1 && touch edgar_tool_installed && echo \"Loaded.\"\n",
"from edgar_tool.cli import SecEdgarScraperCli as edgar_tool\n",
"from edgar_tool.page_fetcher import NoResultsFoundError\n",
"\n",
"# Run the tool with the query\n",
"!echo -n \"Searching EDGAR...\"\n",
"!edgar-tool text_search {search_keywords} --start_date {start_date} --end_date {end_date} --filing_form {filing_type} --entity_id {company_cik} --output {output} --browser firefox --min-wait 0.5 --max-wait 1.5 {loc_filter} >> {logfile} 2>&1\n",
"exit_code = get_ipython().__dict__['user_ns']['_exit_code']\n",
"!echo \"Done.\"\n",
"\n",
"# Error handling\n",
"if exit_code == 2:\n",
"print(\"Searching EDGAR...\")\n",
"try:\n",
" with open(logfile, 'a') as f:\n",
" with redirect_stdout(f):\n",
" edgar_tool.text_search(\n",
" search_keywords,\n",
" start_date=start_date, \n",
" end_date=end_date,\n",
" filing_form=filing_type,\n",
" entity_id=company_cik,\n",
" output=output,\n",
" peo_in=location if filter_by_location==\"Principal executive offices in\" else None,\n",
" inc_in=location if filter_by_location==\"Incorporated in\" else None,\n",
" )\n",
" print(\"Done.\")\n",
" # Load results\n",
" results = pd.read_csv(output)\n",
"\n",
" # Show download button\n",
" btn = widgets.Button(description='Download Results')\n",
" btn.on_click(lambda x: files.download(output))\n",
" display(btn)\n",
"\n",
" # Display the results in a data table\n",
" display(results)\n",
"except NoResultsFoundError:\n",
" print(\"\\x1b[33m No results were found for your query.\\x1b[0m\")\n",
"elif exit_code != 0:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\")\n",
"else:\n",
" try:\n",
" # Load results\n",
" results = pd.read_csv(output)\n",
"\n",
" # Show download button\n",
" btn = widgets.Button(description='Download Results')\n",
" btn.on_click(lambda x: files.download(output))\n",
" display(btn)\n",
"\n",
" # Display the results in a data table\n",
" display(results)\n",
" except FileNotFoundError as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at [email protected] and help us improve the tool for everyone. \\x1b[0m\")\n",
"\n"
"except FileNotFoundError as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at [email protected] and help us improve the tool for everyone. \\x1b[0m\")\n",
"except Exception as e:\n",
" print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\") "
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "edgar-tool"
version = "1.3.1"
version = "1.3.2"
description = "Search and retrieve corporate and financial data from the United States Securities and Exchange Commission (SEC)."
authors = ["Bellingcat"]
license = "GNU General Public License v3 (GPLv3)"
Expand Down

0 comments on commit b6cb1a6

Please sign in to comment.