From 7c1dcb892eaee85cadbaa2d0c24e5733dcd20eb7 Mon Sep 17 00:00:00 2001 From: NewAgeAirbender <34139325+NewAgeAirbender@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:45:34 -0500 Subject: [PATCH] USA: avoid errors for 118-S4869 --- scrapers/usa/bills.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/scrapers/usa/bills.py b/scrapers/usa/bills.py index f28d6d27c8..a8f716c95a 100644 --- a/scrapers/usa/bills.py +++ b/scrapers/usa/bills.py @@ -104,7 +104,7 @@ def scrape(self, chamber=None, session=None, start=None): root = ET.fromstring(sitemaps) # if you want to test a bill: - # yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr3884.xml') + # yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml') for link in root.findall("us:sitemap/us:loc", self.ns): # split by /, then check that "116s" matches the chamber @@ -213,6 +213,8 @@ def classify_actor_by_code(self, action_code: str): return "lower" elif action_code[0:1] == "E": return "executive" + elif action_code[0:1] == "S": + return "upper" if action_code.isdigit(): code = int(action_code) @@ -256,6 +258,9 @@ def classify_action_by_code(self, action): # https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr8337.xml "H37300": "passage", "Intro-H": "introduction", + # new one for senate + # https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml + "Intro-S": "introduction", } return codes.get(action) @@ -263,6 +268,7 @@ def classify_action_by_code(self, action): def classify_action_by_name(self, action): action_classifiers = [ ("Read the second time", ["reading-2"]), + ("referred to", ["referral-committee"]), ( "Received in the Senate. Read the first time", ["introduction", "reading-1"], @@ -334,6 +340,9 @@ def scrape_actions(self, bill, xml): if possible_actor is not None: actor = possible_actor + if not action_text: + action_text = "No action text provided by the source" + bill.add_action( action_text, action_date, @@ -347,7 +356,9 @@ def scrape_hearing_by(self, bill, xml, url): actions = [] for row in xml.findall("bill/actions/item"): - action_text = self.get_xpath(row, "text") + action_text = ( + self.get_xpath(row, "text") if self.get_xpath(row, "text") else "" + ) if "hearings held" not in action_text.lower(): continue committee_name = self.get_xpath(row, "committees/item/name")