Skip to content

Commit

Permalink
Merge pull request #5117 from openstates/hi-fix-action-parsing
Browse files Browse the repository at this point in the history
HI: fix bill action parsing
  • Loading branch information
jessemortenson authored Dec 5, 2024
2 parents 5adfabf + 5f74e45 commit e17d753
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions scrapers/hi/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@

HI_URL_BASE = "https://www.capitol.hawaii.gov"
SHORT_CODES = f"{HI_URL_BASE}/legislature/committees.aspx?chamber=all"
# Set this flag to true to run scrape for just one bill
TEST_SINGLE_BILL = False
TEST_SINGLE_BILL_NUMBER = "572" # set to bill num you want to test
repeated_action = ["Excused: none", "Representative(s) Eli"]


Expand Down Expand Up @@ -80,11 +83,16 @@ def parse_bill_actions_table(
# vote types that have been reconsidered since last vote of that type
reconsiderations = set()

for index, action in enumerate(action_table.xpath("*")[1:]):
date = action[0].text_content()
for index, action_row in enumerate(action_table.cssselect("tr")[1:]):
cells = action_row.cssselect("td")
date_cell = cells[0]
actor_cell = cells[1]
desc_cell = cells[2]

date = date_cell.text_content()
date = dt.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d")
actor_code = action[1].text_content().upper()
string = action[2].text_content()
actor_code = actor_cell.text_content().upper()
string = desc_cell.text_content()
actor = self._vote_type_map[actor_code]
committees = find_committee(string)

Expand Down Expand Up @@ -416,7 +424,11 @@ def scrape_type(self, chamber, session, billtype):
bill_url = bill_url.attrib["href"].replace("www.", "")
if not bill_url.startswith("http"):
bill_url = f"{HI_URL_BASE}{bill_url}"
yield from self.scrape_bill(session, chamber, billtype_map, bill_url)
if (
TEST_SINGLE_BILL is False
or f"billnumber={TEST_SINGLE_BILL_NUMBER}" in bill_url
):
yield from self.scrape_bill(session, chamber, billtype_map, bill_url)

def scrape(self, chamber=None, session=None, scrape_since=None):
get_short_codes(self)
Expand Down

0 comments on commit e17d753

Please sign in to comment.