diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py index 0a9bb7958d..86efc0c6b8 100644 --- a/scrapers/nd/bills.py +++ b/scrapers/nd/bills.py @@ -1,9 +1,12 @@ import logging import re from dateutil import parser -from openstates.scrape import Scraper, Bill +from openstates.scrape import Scraper, Bill, VoteEvent +import pytz from spatula import JsonPage from .actions import NDCategorizer +import lxml.html +import requests class BillList(JsonPage): @@ -11,6 +14,9 @@ class BillList(JsonPage): member_name_re = re.compile(r"^(Sen\.|Rep\.)\s*(.+),\s(.+)") comm_name_re = re.compile(r"^(House|Senate)\s*(.+)") version_name_re = re.compile(r"introduced|engrossment|enrollment") + members_cache = {} + + _tz = pytz.timezone("US/Central") def __init__(self, input_data): super().__init__() @@ -30,6 +36,30 @@ def create_source_url(self): f"{assembly_session_id}-{year}/data/bills.json" ) + def get_voter_name_from_url_request(self, url: str) -> str: + """ + Description: + Get the full name from URL Request + + Example: + - https://ndlegis.gov/biography/liz-conmy -> Liz Conmy + - https://ndlegis.gov/biography/randy-a-schobinger -> Randy A. Schobinger + + """ + if url in self.members_cache: + return self.members_cache[url] + + html_content = requests.get(url).content + doc = lxml.html.fromstring(html_content) + doc.make_links_absolute(url) + + fullname = doc.xpath("string(//h1)").strip() + self.members_cache[url] = ( + fullname.replace("Representative", "").replace("Senator", "").strip() + ) + + return fullname + def process_page(self): json_response = self.response.json() bills = json_response.get("bills") @@ -128,6 +158,103 @@ def process_page(self): yield bill + # Get bill-actions url from bill-overview url + action_url = ( + bill_data["url"] + .replace("/bo", "/ba") + .replace("bill-overview", "bill-actions") + ) + + html_content = requests.get(action_url).content + doc = lxml.html.fromstring(html_content) + doc.make_links_absolute(action_url) + votes_list = doc.xpath( + '//div[@aria-labelledby="vote-modal"]//div[@class="modal-content"]' + ) + for vote_modal in votes_list: + motion_text = ( + vote_modal.xpath('.//h5[@class="modal-title"]')[0] + .text_content() + .strip() + ) + date = parser.parse( + vote_modal.xpath( + './/div[@class="modal-body"]/span[@class="float-right"]' + )[0] + .text_content() + .strip() + ) + start_date = self._tz.localize(date) + status = ( + vote_modal.xpath('.//div[@class="modal-body"]/span[@class="bold"]')[ + 0 + ] + .text_content() + .strip() + ) + chamber = "lower" if "house" in status.lower() else "upper" + status = "pass" if "passed" in status.lower() else "fail" + vote = VoteEvent( + chamber=chamber, + start_date=start_date, + motion_text=f"Motion for {motion_text} on {bill_id}", + result=status, + legislative_session=self.input.get("assembly_id"), + # TODO: get all possible classification types, replace below + classification="passage", + bill=bill_id, + bill_chamber="lower" if bill_id[0] == "H" else "upper", + ) + vote.add_source(action_url) + yes_count = ( + vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]/h6' + )[0] + .text_content() + .strip() + .split(" ")[0] + ) + no_count = ( + vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]/h6' + )[0] + .text_content() + .strip() + .split(" ")[0] + ) + other_count = ( + vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]/h6' + )[0] + .text_content() + .strip() + .split(" ")[0] + ) + + vote.set_count("yes", int(yes_count)) + vote.set_count("no", int(no_count)) + vote.set_count("other", int(other_count)) + for vote_link in vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]//a' + ): + voter_url = vote_link.attrib["href"] + voter_name = self.get_voter_name_from_url_request(voter_url) + vote.yes(voter_name) + for vote_link in vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]//a' + ): + voter_url = vote_link.attrib["href"] + voter_name = self.get_voter_name_from_url_request(voter_url) + vote.no(voter_name) + for vote_link in vote_modal.xpath( + './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]//a' + ): + voter_url = vote_link.attrib["href"] + voter_name = self.get_voter_name_from_url_request(voter_url) + vote.vote("other", voter_name) + + yield vote + class NDBillScraper(Scraper): def scrape(self, session=None):