Skip to content

Commit

Permalink
Replace a couple more problematic links
Browse files Browse the repository at this point in the history
  • Loading branch information
pcraig3 committed Nov 22, 2024
1 parent d6f41f1 commit a3427d0
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve
### Fixed

- Replace "www.grants.gov/web/grants/search-grants.html" with "grants.gov/search-grants"
- Replace "www.grants.gov/web/grants/forms/sf-424-family.html" with "grants.gov/forms/forms-repository/sf-424-family"
- Replace "www.cdc.gov/grants/dictionary/index.html" with "www.cdc.gov/grants/dictionary/index.html"

## [1.37.0] - 2023-11-21

Expand Down
20 changes: 19 additions & 1 deletion bloom_nofos/nofos/nofo.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,29 @@ def replace_chars(file_content):
("\u00A8", "\u25FB"),
# from () U+007F DELETE to (◻) U+25FB WHITE MEDIUM SQUARE
("\u007F", "\u25FB"),
# replace this "page not found" url with the new one
]

for _from, _to in replacement_chars:
file_content = file_content.replace(_from, _to)

return file_content


def replace_links(file_content):
# grants.gov/web links are broken and don't redirect _and_ say they are 200 🙄
replacement_chars = [
(
"www.grants.gov/web/grants/search-grants.html",
"grants.gov/search-grants",
),
(
"www.grants.gov/web/grants/forms/sf-424-family.html",
"grants.gov/forms/forms-repository/sf-424-family",
),
(
"www.cdc.gov/grants/dictionary/index.html",
"www.cdc.gov/grants/dictionary-of-terms/",
),
]

for _from, _to in replacement_chars:
Expand Down
5 changes: 4 additions & 1 deletion bloom_nofos/nofos/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
preserve_table_heading_links,
remove_google_tracking_info_from_links,
replace_chars,
replace_links,
replace_src_for_inline_images,
suggest_all_nofo_fields,
suggest_nofo_title,
Expand Down Expand Up @@ -297,8 +298,10 @@ def nofo_import(request, pk=None):
)
return redirect(view_path, **kwargs)

# replace problematic characters on import
# replace problematic characters/links on import
cleaned_content = replace_chars(file_content)
cleaned_content = replace_links(file_content)

soup = BeautifulSoup(cleaned_content, "html.parser") # Parse the cleaned HTML
soup = add_body_if_no_body(soup)

Expand Down

0 comments on commit a3427d0

Please sign in to comment.