Skip to content

Commit

Permalink
USA: avoid errors for 118-S4869
Browse files Browse the repository at this point in the history
  • Loading branch information
NewAgeAirbender committed Aug 6, 2024
1 parent 220ba0e commit 7c1dcb8
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions scrapers/usa/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def scrape(self, chamber=None, session=None, start=None):
root = ET.fromstring(sitemaps)

# if you want to test a bill:
# yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr3884.xml')
# yield from self.parse_bill('https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml')

for link in root.findall("us:sitemap/us:loc", self.ns):
# split by /, then check that "116s" matches the chamber
Expand Down Expand Up @@ -213,6 +213,8 @@ def classify_actor_by_code(self, action_code: str):
return "lower"
elif action_code[0:1] == "E":
return "executive"
elif action_code[0:1] == "S":
return "upper"

if action_code.isdigit():
code = int(action_code)
Expand Down Expand Up @@ -256,13 +258,17 @@ def classify_action_by_code(self, action):
# https://www.govinfo.gov/bulkdata/BILLSTATUS/116/hr/BILLSTATUS-116hr8337.xml
"H37300": "passage",
"Intro-H": "introduction",
# new one for senate
# https://www.govinfo.gov/bulkdata/BILLSTATUS/118/s/BILLSTATUS-118s4869.xml
"Intro-S": "introduction",
}

return codes.get(action)

def classify_action_by_name(self, action):
action_classifiers = [
("Read the second time", ["reading-2"]),
("referred to", ["referral-committee"]),
(
"Received in the Senate. Read the first time",
["introduction", "reading-1"],
Expand Down Expand Up @@ -334,6 +340,9 @@ def scrape_actions(self, bill, xml):
if possible_actor is not None:
actor = possible_actor

if not action_text:
action_text = "No action text provided by the source"

bill.add_action(
action_text,
action_date,
Expand All @@ -347,7 +356,9 @@ def scrape_hearing_by(self, bill, xml, url):
actions = []

for row in xml.findall("bill/actions/item"):
action_text = self.get_xpath(row, "text")
action_text = (
self.get_xpath(row, "text") if self.get_xpath(row, "text") else ""
)
if "hearings held" not in action_text.lower():
continue
committee_name = self.get_xpath(row, "committees/item/name")
Expand Down

0 comments on commit 7c1dcb8

Please sign in to comment.