USA: avoid errors for 118-S4869

openstates · Aug 6, 2024 · 7c1dcb8 · 7c1dcb8
1 parent 220ba0e
commit 7c1dcb8
Showing 1 changed file with 13 additions and 2 deletions.
diff --git a/scrapers/usa/bills.py b/scrapers/usa/bills.py
@@ -104,7 +104,7 @@ def scrape(self, chamber=None, session=None, start=None):
         root = ET.fromstring(sitemaps)
 
         # if you want to test a bill:
-        # yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr3884.xml')
+        # yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml')
 
         for link in root.findall("us:sitemap/us:loc", self.ns):
             # split by /, then check that "116s" matches the chamber
@@ -213,6 +213,8 @@ def classify_actor_by_code(self, action_code: str):
             return "lower"
         elif action_code[0:1] == "E":
             return "executive"
+        elif action_code[0:1] == "S":
+            return "upper"
 
         if action_code.isdigit():
             code = int(action_code)
@@ -256,13 +258,17 @@ def classify_action_by_code(self, action):
             # https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr8337.xml
             "H37300": "passage",
             "Intro-H": "introduction",
+            # new one for senate
+            # https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml
+            "Intro-S": "introduction",
         }
 
         return codes.get(action)
 
     def classify_action_by_name(self, action):
         action_classifiers = [
             ("Read the second time", ["reading-2"]),
+            ("referred to", ["referral-committee"]),
             (
                 "Received in the Senate. Read the first time",
                 ["introduction", "reading-1"],
@@ -334,6 +340,9 @@ def scrape_actions(self, bill, xml):
                     if possible_actor is not None:
                         actor = possible_actor
 
+                if not action_text:
+                    action_text = "No action text provided by the source"
+
                 bill.add_action(
                     action_text,
                     action_date,
@@ -347,7 +356,9 @@ def scrape_hearing_by(self, bill, xml, url):
         actions = []
 
         for row in xml.findall("bill/actions/item"):
-            action_text = self.get_xpath(row, "text")
+            action_text = (
+                self.get_xpath(row, "text") if self.get_xpath(row, "text") else ""
+            )
             if "hearings held" not in action_text.lower():
                 continue
             committee_name = self.get_xpath(row, "committees/item/name")