Skip to content

Commit

Permalink
Merge pull request #5059 from braykuka/1242-ND-scrape-how-legislators…
Browse files Browse the repository at this point in the history
…-voted

ND: add votes scraping to bills scraper
  • Loading branch information
jessemortenson authored Oct 24, 2024
2 parents 93cb59a + d99e121 commit 0356e80
Showing 1 changed file with 128 additions and 1 deletion.
129 changes: 128 additions & 1 deletion scrapers/nd/bills.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
import logging
import re
from dateutil import parser
from openstates.scrape import Scraper, Bill
from openstates.scrape import Scraper, Bill, VoteEvent
import pytz
from spatula import JsonPage
from .actions import NDCategorizer
import lxml.html
import requests


class BillList(JsonPage):
categorizer = NDCategorizer()
member_name_re = re.compile(r"^(Sen\.|Rep\.)\s*(.+),\s(.+)")
comm_name_re = re.compile(r"^(House|Senate)\s*(.+)")
version_name_re = re.compile(r"introduced|engrossment|enrollment")
members_cache = {}

_tz = pytz.timezone("US/Central")

def __init__(self, input_data):
super().__init__()
Expand All @@ -30,6 +36,30 @@ def create_source_url(self):
f"{assembly_session_id}-{year}/data/bills.json"
)

def get_voter_name_from_url_request(self, url: str) -> str:
"""
Description:
Get the full name from URL Request
Example:
- https://ndlegis.gov/biography/liz-conmy -> Liz Conmy
- https://ndlegis.gov/biography/randy-a-schobinger -> Randy A. Schobinger
"""
if url in self.members_cache:
return self.members_cache[url]

html_content = requests.get(url).content
doc = lxml.html.fromstring(html_content)
doc.make_links_absolute(url)

fullname = doc.xpath("string(//h1)").strip()
self.members_cache[url] = (
fullname.replace("Representative", "").replace("Senator", "").strip()
)

return fullname

def process_page(self):
json_response = self.response.json()
bills = json_response.get("bills")
Expand Down Expand Up @@ -128,6 +158,103 @@ def process_page(self):

yield bill

# Get bill-actions url from bill-overview url
action_url = (
bill_data["url"]
.replace("/bo", "/ba")
.replace("bill-overview", "bill-actions")
)

html_content = requests.get(action_url).content
doc = lxml.html.fromstring(html_content)
doc.make_links_absolute(action_url)
votes_list = doc.xpath(
'//div[@aria-labelledby="vote-modal"]//div[@class="modal-content"]'
)
for vote_modal in votes_list:
motion_text = (
vote_modal.xpath('.//h5[@class="modal-title"]')[0]
.text_content()
.strip()
)
date = parser.parse(
vote_modal.xpath(
'.//div[@class="modal-body"]/span[@class="float-right"]'
)[0]
.text_content()
.strip()
)
start_date = self._tz.localize(date)
status = (
vote_modal.xpath('.//div[@class="modal-body"]/span[@class="bold"]')[
0
]
.text_content()
.strip()
)
chamber = "lower" if "house" in status.lower() else "upper"
status = "pass" if "passed" in status.lower() else "fail"
vote = VoteEvent(
chamber=chamber,
start_date=start_date,
motion_text=f"Motion for {motion_text} on {bill_id}",
result=status,
legislative_session=self.input.get("assembly_id"),
# TODO: get all possible classification types, replace below
classification="passage",
bill=bill_id,
bill_chamber="lower" if bill_id[0] == "H" else "upper",
)
vote.add_source(action_url)
yes_count = (
vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Yea")]]/h6'
)[0]
.text_content()
.strip()
.split(" ")[0]
)
no_count = (
vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Nay")]]/h6'
)[0]
.text_content()
.strip()
.split(" ")[0]
)
other_count = (
vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Absent")]]/h6'
)[0]
.text_content()
.strip()
.split(" ")[0]
)

vote.set_count("yes", int(yes_count))
vote.set_count("no", int(no_count))
vote.set_count("other", int(other_count))
for vote_link in vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Yea")]]//a'
):
voter_url = vote_link.attrib["href"]
voter_name = self.get_voter_name_from_url_request(voter_url)
vote.yes(voter_name)
for vote_link in vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Nay")]]//a'
):
voter_url = vote_link.attrib["href"]
voter_name = self.get_voter_name_from_url_request(voter_url)
vote.no(voter_name)
for vote_link in vote_modal.xpath(
'.//div[@class="modal-body"]/div[./h6[contains(., "Absent")]]//a'
):
voter_url = vote_link.attrib["href"]
voter_name = self.get_voter_name_from_url_request(voter_url)
vote.vote("other", voter_name)

yield vote


class NDBillScraper(Scraper):
def scrape(self, session=None):
Expand Down

0 comments on commit 0356e80

Please sign in to comment.