Skip to content

Commit

Permalink
Fix small issue
Browse files Browse the repository at this point in the history
  • Loading branch information
braykuka committed Dec 10, 2024
1 parent 5279c5c commit e0190f8
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 12 deletions.
14 changes: 8 additions & 6 deletions scrapers/il/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from openstates.utils import convert_pdf


BASE_URL = "https://beta.ilga.gov"
central = pytz.timezone("US/Central")


Expand Down Expand Up @@ -239,7 +240,7 @@
}

DUPE_VOTES = {
"https://ilga.gov/legislation/votehistory/100/house/committeevotes/"
f"{BASE_URL}/legislation/votehistory/100/house/committeevotes/"
"10000HB2457_16401.pdf"
}

Expand Down Expand Up @@ -279,12 +280,13 @@ def chamber_slug(chamber):


class IlBillScraper(Scraper):
LEGISLATION_URL = "https://beta.ilga.gov/Legislation/"
LEGISLATION_URL = f"{BASE_URL}/Legislation/"
localize = pytz.timezone("America/Chicago").localize

def get_bill_urls(self, chamber, session, doc_type):
params = session_details[session]["params"]
url = "https://beta.ilga.gov/Legislation/RegularSession/{}?SessionId={}".format(
url = "{}/Legislation/RegularSession/{}?SessionId={}".format(
BASE_URL,
doc_type,
params["SessionId"],
)
Expand All @@ -293,7 +295,7 @@ def get_bill_urls(self, chamber, session, doc_type):
doc.make_links_absolute(url)

for bill_url in doc.xpath(
'//div[@id="div_0001"]//table//td[1]/a[contains(@href, "DocNum=")]/@href'
'//div[contains(@id,"div_")]//table//td[1]/a[contains(@href, "DocNum=")]/@href'
):
yield bill_url

Expand Down Expand Up @@ -326,7 +328,7 @@ def scrape(self, session=None):

def scrape_archive_bills(self, session):
session_abr = session[0:2]
url = f"https://beta.ilga.gov/documents/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
url = f"{BASE_URL}/documents/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
Expand Down Expand Up @@ -616,7 +618,7 @@ def scrape_documents(self, bill, version_url):
)
)
# if it's html, extract the pdf link too while we're here.
pdf_url = f"https://beta.ilga.gov/documents/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
pdf_url = f"{BASE_URL}/documents/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
bill.add_version_link(name, pdf_url, media_type="application/pdf")

bill.add_version_link(name, url, media_type=mimetype)
Expand Down
12 changes: 6 additions & 6 deletions scrapers/il/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

import pytz

BASE_URL = "https://beta.ilga.gov"
urls = {
"upper": "https://beta.ilga.gov/Senate/Schedules",
"lower": "https://beta.ilga.gov/House/Schedules",
"upper": f"{BASE_URL}/Senate/Schedules",
"lower": f"{BASE_URL}/House/Schedules",
}

chamber_names = {
Expand Down Expand Up @@ -72,7 +73,6 @@ def scrape_page(self, url, chamber):
repl = {"AM": " AM", "PM": " PM"} # Space shim.
for r in repl:
datetime = datetime.replace(r, repl[r])
# datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))
datetime = self.localize(dt.datetime.strptime(datetime, "%m/%d/%Y %I:%M %p"))

event_name = f"{description}#{where}#{datetime}"
Expand Down Expand Up @@ -125,9 +125,9 @@ def scrape(self):
for table in tables:
meetings = table.xpath(".//button")
for meeting in meetings:
meeting_url = "https://beta.ilga.gov" + meeting.attrib[
"onclick"
].replace("location.href=", "").strip("'. ")
meeting_url = BASE_URL + meeting.attrib["onclick"].replace(
"location.href=", ""
).strip("'. ")
event, name = self.scrape_page(meeting_url, chamber_names[chamber])
if event and name:
if name in events:
Expand Down

0 comments on commit e0190f8

Please sign in to comment.