From 52d067b955da42fd2941cd7cff21a4ba9afa4d9c Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Tue, 17 Dec 2024 16:15:19 -0600 Subject: [PATCH 1/2] IN: bills improve committee recognition around actions --- scrapers/in/bills.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scrapers/in/bills.py b/scrapers/in/bills.py index 171be95580..9e747e47fe 100644 --- a/scrapers/in/bills.py +++ b/scrapers/in/bills.py @@ -376,6 +376,7 @@ def scrape(self, session=None): self.logger.warning("Could not find bill actions page") actions = [] + committee_name_match_regex = r"committee on (.*?)( pursuant to|$)" for action in actions: action_desc = action["description"] @@ -424,8 +425,9 @@ def scrape(self, session=None): action_type.append("passage") # Identify related committee - if "committee on" in action_desc_lower: - committee = action_desc_lower.split("committee on")[-1].strip() + committee_matches = re.search(committee_name_match_regex, action_desc, re.IGNORECASE) + if committee_matches: + committee = committee_matches[1].strip() # Add action to bill action_instance = bill.add_action( From 9c04295bf064ed5ecb253363195bf4a66278109e Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Tue, 17 Dec 2024 16:16:30 -0600 Subject: [PATCH 2/2] IN: fix linting --- scrapers/in/bills.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/in/bills.py b/scrapers/in/bills.py index 9e747e47fe..90981669e6 100644 --- a/scrapers/in/bills.py +++ b/scrapers/in/bills.py @@ -425,7 +425,9 @@ def scrape(self, session=None): action_type.append("passage") # Identify related committee - committee_matches = re.search(committee_name_match_regex, action_desc, re.IGNORECASE) + committee_matches = re.search( + committee_name_match_regex, action_desc, re.IGNORECASE + ) if committee_matches: committee = committee_matches[1].strip()