From da0ceee1d6222315949274af9db3782366203c2d Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Mon, 21 Oct 2024 08:21:31 +0200
Subject: [PATCH 1/5] ND: add votes scraping to bills scraper

---
 scrapers/nd/bills.py | 100 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 99 insertions(+), 1 deletion(-)

diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py
index 0a9bb7958d..d46c39695e 100644
--- a/scrapers/nd/bills.py
+++ b/scrapers/nd/bills.py
@@ -1,9 +1,12 @@
 import logging
 import re
 from dateutil import parser
-from openstates.scrape import Scraper, Bill
+from openstates.scrape import Scraper, Bill, VoteEvent
+import pytz
 from spatula import JsonPage
 from .actions import NDCategorizer
+import lxml.html
+import requests
 
 
 class BillList(JsonPage):
@@ -11,6 +14,7 @@ class BillList(JsonPage):
     member_name_re = re.compile(r"^(Sen\.|Rep\.)\s*(.+),\s(.+)")
     comm_name_re = re.compile(r"^(House|Senate)\s*(.+)")
     version_name_re = re.compile(r"introduced|engrossment|enrollment")
+    _tz = pytz.timezone("US/Central")
 
     def __init__(self, input_data):
         super().__init__()
@@ -128,6 +132,100 @@ def process_page(self):
 
             yield bill
 
+            # Get bill-actions url from bill-overview url
+            action_url = (
+                bill_data["url"]
+                .replace("/bo", "/ba")
+                .replace("bill-overview", "bill-actions")
+            )
+
+            html_content = requests.get(action_url).content
+            doc = lxml.html.fromstring(html_content)
+            doc.make_links_absolute(action_url)
+            votes_list = doc.xpath(
+                '//div[@aria-labelledby="vote-modal"]//div[@class="modal-content"]'
+            )
+            for vote_modal in votes_list:
+                motion_text = (
+                    vote_modal.xpath('.//h5[@class="modal-title"]')[0]
+                    .text_content()
+                    .strip()
+                )
+                date = parser.parse(
+                    vote_modal.xpath(
+                        './/div[@class="modal-body"]/span[@class="float-right"]'
+                    )[0]
+                    .text_content()
+                    .strip()
+                )
+                start_date = self._tz.localize(date)
+                status = (
+                    vote_modal.xpath('.//div[@class="modal-body"]/span[@class="bold"]')[
+                        0
+                    ]
+                    .text_content()
+                    .strip()
+                )
+                chamber = "lower" if "house" in status.lower() else "upper"
+                status = "pass" if "passed" in status.lower() else "fail"
+                vote = VoteEvent(
+                    chamber=chamber,
+                    start_date=start_date,
+                    motion_text=f"Motion for {motion_text} on {bill_id}",
+                    result=status,
+                    legislative_session=self.input.get("assembly_id"),
+                    # TODO: get all possible classification types, replace below
+                    classification="passage",
+                    bill=bill_id,
+                    bill_chamber="lower" if bill_id[0] == "H" else "upper",
+                )
+                vote.add_source(action_url)
+                yes_count = (
+                    vote_modal.xpath(
+                        './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]/h6'
+                    )[0]
+                    .text_content()
+                    .strip()
+                    .split(" ")[0]
+                )
+                no_count = (
+                    vote_modal.xpath(
+                        './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]/h6'
+                    )[0]
+                    .text_content()
+                    .strip()
+                    .split(" ")[0]
+                )
+                other_count = (
+                    vote_modal.xpath(
+                        './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]/h6'
+                    )[0]
+                    .text_content()
+                    .strip()
+                    .split(" ")[0]
+                )
+
+                vote.set_count("yes", int(yes_count))
+                vote.set_count("no", int(no_count))
+                vote.set_count("other", int(other_count))
+                for vote_div in vote_modal.xpath(
+                    './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]//a'
+                ):
+                    voter_name = vote_div.text_content().strip()
+                    vote.yes(voter_name)
+                for vote_div in vote_modal.xpath(
+                    './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]//a'
+                ):
+                    voter_name = vote_div.text_content().strip()
+                    vote.no(voter_name)
+                for vote_div in vote_modal.xpath(
+                    './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]//a'
+                ):
+                    voter_name = vote_div.text_content().strip()
+                    vote.vote("other", voter_name)
+
+                yield vote
+
 
 class NDBillScraper(Scraper):
     def scrape(self, session=None):

From f1841ae83c6baa2fc65694b9005663770a3ac3fe Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Wed, 23 Oct 2024 23:20:30 +0200
Subject: [PATCH 2/5] Fix: updates the voter name to the full name

---
 scrapers/nd/bills.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py
index d46c39695e..13bed5e8d4 100644
--- a/scrapers/nd/bills.py
+++ b/scrapers/nd/bills.py
@@ -34,6 +34,22 @@ def create_source_url(self):
             f"{assembly_session_id}-{year}/data/bills.json"
         )
 
+    def get_voter_name_from_url(self, url):
+        name_uri = (
+            url.replace("https://ndlegis.gov/biography/", "")
+            .split("?")[0]
+            .split("/")[0]
+            .strip()
+        )
+
+        name_words = []
+        for w in name_uri.split("-"):
+            if len(w) == 1:
+                name_words.append(f"{w}.".title())
+            else:
+                name_words.append(w.title())
+        return " ".join(name_words)
+
     def process_page(self):
         json_response = self.response.json()
         bills = json_response.get("bills")
@@ -208,20 +224,23 @@ def process_page(self):
                 vote.set_count("yes", int(yes_count))
                 vote.set_count("no", int(no_count))
                 vote.set_count("other", int(other_count))
-                for vote_div in vote_modal.xpath(
+                for vote_link in vote_modal.xpath(
                     './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]//a'
                 ):
-                    voter_name = vote_div.text_content().strip()
+                    voter_url = vote_link.attrib["href"]
+                    voter_name = self.get_voter_name_from_url(voter_url)
                     vote.yes(voter_name)
-                for vote_div in vote_modal.xpath(
+                for vote_link in vote_modal.xpath(
                     './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]//a'
                 ):
-                    voter_name = vote_div.text_content().strip()
+                    voter_url = vote_link.attrib["href"]
+                    voter_name = self.get_voter_name_from_url(voter_url)
                     vote.no(voter_name)
-                for vote_div in vote_modal.xpath(
+                for vote_link in vote_modal.xpath(
                     './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]//a'
                 ):
-                    voter_name = vote_div.text_content().strip()
+                    voter_url = vote_link.attrib["href"]
+                    voter_name = self.get_voter_name_from_url(voter_url)
                     vote.vote("other", voter_name)
 
                 yield vote

From 61c10a13ce406d35587f15de47b238b4efe004c8 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Wed, 23 Oct 2024 23:30:04 +0200
Subject: [PATCH 3/5] add docs

---
 scrapers/nd/bills.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py
index 13bed5e8d4..89ed15ffad 100644
--- a/scrapers/nd/bills.py
+++ b/scrapers/nd/bills.py
@@ -34,7 +34,16 @@ def create_source_url(self):
             f"{assembly_session_id}-{year}/data/bills.json"
         )
 
-    def get_voter_name_from_url(self, url):
+    def get_voter_name_from_url(self, url: str) -> str:
+        """
+        Description:
+            Get the full name from URL
+
+        Example:
+            - https://ndlegis.gov/biography/liz-conmy -> Liz Conmy
+            - https://ndlegis.gov/biography/randy-a-schobinger -> Randy A. Schobinger
+
+        """
         name_uri = (
             url.replace("https://ndlegis.gov/biography/", "")
             .split("?")[0]
@@ -48,6 +57,7 @@ def get_voter_name_from_url(self, url):
                 name_words.append(f"{w}.".title())
             else:
                 name_words.append(w.title())
+
         return " ".join(name_words)
 
     def process_page(self):

From 7ed34ecd8a092a7284b5092a8c75738eeb046bc2 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Thu, 24 Oct 2024 14:17:27 +0200
Subject: [PATCH 4/5] update a function to get the voter name

---
 scrapers/nd/bills.py | 44 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py
index 89ed15ffad..e1737fda74 100644
--- a/scrapers/nd/bills.py
+++ b/scrapers/nd/bills.py
@@ -14,6 +14,8 @@ class BillList(JsonPage):
     member_name_re = re.compile(r"^(Sen\.|Rep\.)\s*(.+),\s(.+)")
     comm_name_re = re.compile(r"^(House|Senate)\s*(.+)")
     version_name_re = re.compile(r"introduced|engrossment|enrollment")
+    members_cache = {}
+
     _tz = pytz.timezone("US/Central")
 
     def __init__(self, input_data):
@@ -50,15 +52,37 @@ def get_voter_name_from_url(self, url: str) -> str:
             .split("/")[0]
             .strip()
         )
+        name_words = [w.title() for w in name_uri.split("-")]
+        if len(name_words) == 3 and len(name_words[1]) == 1:
+            return "{0} {1}. {2}".format(*name_words)
+        elif len(name_words) == 3 and len(name_words[1]) > 1:
+            return "{0} {1}-{2}".format(*name_words)
+        else:
+            return " ".join(name_words)
+
+    def get_voter_name_from_url_request(self, url: str) -> str:
+        """
+        Description:
+            Get the full name from URL Request
+
+        Example:
+            - https://ndlegis.gov/biography/liz-conmy -> Liz Conmy
+            - https://ndlegis.gov/biography/randy-a-schobinger -> Randy A. Schobinger
+
+        """
+        if url in self.members_cache:
+            return self.members_cache[url]
+
+        html_content = requests.get(url).content
+        doc = lxml.html.fromstring(html_content)
+        doc.make_links_absolute(url)
 
-        name_words = []
-        for w in name_uri.split("-"):
-            if len(w) == 1:
-                name_words.append(f"{w}.".title())
-            else:
-                name_words.append(w.title())
+        fullname = doc.xpath("string(//h1)").strip()
+        self.members_cache[url] = (
+            fullname.replace("Representative", "").replace("Senator", "").strip()
+        )
 
-        return " ".join(name_words)
+        return fullname
 
     def process_page(self):
         json_response = self.response.json()
@@ -238,19 +262,19 @@ def process_page(self):
                     './/div[@class="modal-body"]/div[./h6[contains(., "Yea")]]//a'
                 ):
                     voter_url = vote_link.attrib["href"]
-                    voter_name = self.get_voter_name_from_url(voter_url)
+                    voter_name = self.get_voter_name_from_url_request(voter_url)
                     vote.yes(voter_name)
                 for vote_link in vote_modal.xpath(
                     './/div[@class="modal-body"]/div[./h6[contains(., "Nay")]]//a'
                 ):
                     voter_url = vote_link.attrib["href"]
-                    voter_name = self.get_voter_name_from_url(voter_url)
+                    voter_name = self.get_voter_name_from_url_request(voter_url)
                     vote.no(voter_name)
                 for vote_link in vote_modal.xpath(
                     './/div[@class="modal-body"]/div[./h6[contains(., "Absent")]]//a'
                 ):
                     voter_url = vote_link.attrib["href"]
-                    voter_name = self.get_voter_name_from_url(voter_url)
+                    voter_name = self.get_voter_name_from_url_request(voter_url)
                     vote.vote("other", voter_name)
 
                 yield vote

From d99e12124282e9d2f9c7fcce338307c2dde623a6 Mon Sep 17 00:00:00 2001
From: Jesse Mortenson <jessemortenson@gmail.com>
Date: Thu, 24 Oct 2024 11:39:22 -0600
Subject: [PATCH 5/5] ND: remove unused voter name identification method

---
 scrapers/nd/bills.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py
index e1737fda74..86efc0c6b8 100644
--- a/scrapers/nd/bills.py
+++ b/scrapers/nd/bills.py
@@ -36,30 +36,6 @@ def create_source_url(self):
             f"{assembly_session_id}-{year}/data/bills.json"
         )
 
-    def get_voter_name_from_url(self, url: str) -> str:
-        """
-        Description:
-            Get the full name from URL
-
-        Example:
-            - https://ndlegis.gov/biography/liz-conmy -> Liz Conmy
-            - https://ndlegis.gov/biography/randy-a-schobinger -> Randy A. Schobinger
-
-        """
-        name_uri = (
-            url.replace("https://ndlegis.gov/biography/", "")
-            .split("?")[0]
-            .split("/")[0]
-            .strip()
-        )
-        name_words = [w.title() for w in name_uri.split("-")]
-        if len(name_words) == 3 and len(name_words[1]) == 1:
-            return "{0} {1}. {2}".format(*name_words)
-        elif len(name_words) == 3 and len(name_words[1]) > 1:
-            return "{0} {1}-{2}".format(*name_words)
-        else:
-            return " ".join(name_words)
-
     def get_voter_name_from_url_request(self, url: str) -> str:
         """
         Description: