Skip to content

Commit

Permalink
OH: Events: Add agendas and bills (#4516)
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst authored Dec 4, 2023
1 parent 3a3d38c commit 9864e84
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions scrapers/oh/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,20 @@ class OHEventScraper(Scraper):
_tz = pytz.timezone("US/Eastern")

base_url = "https://www.legislature.ohio.gov/schedules/"
api_base_url = "https://search-prod.lis.state.oh.us/"
session_id = ""

scraper = cloudscraper.create_scraper()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
}

dedupe_keys = set()

def scrape(self, start=None, end=None):
# pull the newest session id from __init__.py
self.session_id = self.jurisdiction.legislative_sessions[-1]["identifier"]

if start is None:
start = dt.datetime.today()
else:
Expand All @@ -72,6 +80,7 @@ def scrape(self, start=None, end=None):

url = f"{self.base_url}calendar-data?start={start}&end={end}"
try:
self.info(f"Fetching {url}")
data = json.loads(self.scraper.get(url).content)
except Exception:
raise EmptyScrape
Expand Down Expand Up @@ -137,6 +146,9 @@ def scrape(self, start=None, end=None):
else:
self.dedupe_keys.add(event_key)

if re.match(r"^.*\shearing room", location, flags=re.IGNORECASE):
location = f"{location}, 1 Capitol Square, Columbus, OH 43215"

event = Event(
name=name, start_date=when, location_name=location, status=status
)
Expand All @@ -155,6 +167,25 @@ def scrape(self, start=None, end=None):

event.add_source(url)

# API has more data on agenda and bills, ex:
# https://search-prod.lis.state.oh.us/solarapi/v1/general_assembly_135/notices/cmte_s_health_1/2023-03-01

com_id = re.search(r"\/([a-z_]+\d)", item["url"], flags=re.IGNORECASE)
if com_id:
com_id = com_id.group(1)
hearing_date = when.strftime("%Y-%m-%d")
api_url = f"{self.api_base_url}/solarapi/v1/general_assembly_{self.session_id}/notices/{com_id}/{hearing_date}?format=json"
self.info(f"Fetching {api_url}")
api_data = json.loads(self.scraper.get(api_url).content)
for row in api_data["agenda"]:
item_text = f"{row['headline']} - {row['proposed_sponsor']}"
agenda_item = event.add_agenda_item(item_text)
if "billno" in row:
agenda_item.add_bill(row["billno"])

# Note: there's an api element called 'testimonies' that appears to just be
# witness registration forms from the bill sponsors, so we're skipping those.

event_count += 1

yield event
Expand Down

0 comments on commit 9864e84

Please sign in to comment.