Skip to content

Commit

Permalink
CA, OK: Events Fixes (#4959)
Browse files Browse the repository at this point in the history
* OK: Fix for empty senate events message

* CA: Fix for events with multiple dashes in the time/location string
  • Loading branch information
showerst authored Jun 10, 2024
1 parent 1d067c3 commit 578c2f0
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
4 changes: 3 additions & 1 deletion scrapers/ca/events_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ def scrape_lower(self):
'.//div[@class="attribute time-location"]'
)[0].xpath("string()")

hearing_time, hearing_location = time_loc.split(" - ")
time_loc = time_loc.split(" - ")
hearing_time = time_loc[0]
hearing_location = " - ".join(time_loc[1:])
hearing_time = (
hearing_time.replace(".", "").strip(strip_chars)
if ".m." in hearing_time
Expand Down
9 changes: 8 additions & 1 deletion scrapers/ok/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,17 @@ def scrape(self, start=None, end=None):
def scrape_senate(self):
# url = "https://oksenate.gov/committee-meetings"
url = "https://accessible.oksenate.gov/committee-meetings"
page = lxml.html.fromstring(self.get(url).content)
page = self.get(url).content
page = lxml.html.fromstring(page)
page.make_links_absolute(url)

for row in page.xpath("//div[contains(@class,'bTiles__items')]/span"):

if row.xpath(
"//p[contains(text(), 'There are currently no live Committee Meetings in progress')]"
):
continue

event_link = row.xpath(".//a[contains(@class,'bTiles__title')]")[0]
event_title = event_link.xpath("string(.)")
event_url = event_link.xpath("@href")[0]
Expand Down

0 comments on commit 578c2f0

Please sign in to comment.