From 4e5a263708a781db9fcf93778dd347fe54de2d74 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Mon, 9 Dec 2024 15:15:30 +0100
Subject: [PATCH 1/4] IL: Bill scraper rewrite to new beta site

---
 scrapers/il/__init__.py |  78 +++++++++++---------
 scrapers/il/bills.py    | 154 ++++++++++++++++++++--------------------
 scrapers/il/events.py   |  71 ++++++++++--------
 3 files changed, 165 insertions(+), 138 deletions(-)

diff --git a/scrapers/il/__init__.py b/scrapers/il/__init__.py
index ed5889785b..2d3aa32a76 100644
--- a/scrapers/il/__init__.py
+++ b/scrapers/il/__init__.py
@@ -1,4 +1,6 @@
 # encoding=utf-8
+from urllib import response
+import requests
 from utils import url_xpath
 from openstates.scrape import State
 from .bills import IlBillScraper
@@ -6,16 +8,13 @@
 
 
 class Illinois(State):
-    scrapers = {
-        "bills": IlBillScraper,
-        "events": IlEventScraper,
-    }
+    scrapers = {"bills": IlBillScraper, "events": IlEventScraper}
     legislative_sessions = [
         {
             "name": "90th Regular Session",
             "identifier": "90th",
             "classification": "primary",
-            "_scraped_name": "90   (1997-1998)",
+            "_scraped_name": "90th General Assembly (1997-1998)",
             "start_date": "1997-01-08",
             "end_date": "1999-01-12",
         },
@@ -23,7 +22,7 @@ class Illinois(State):
             "name": "91st Regular Session",
             "identifier": "91st",
             "classification": "primary",
-            "_scraped_name": "91   (1999-2000)",
+            "_scraped_name": "91st General Assembly (1999-2000)",
             "start_date": "1999-01-13",
             "end_date": "2001-01-09",
         },
@@ -31,7 +30,7 @@ class Illinois(State):
             "name": "92nd Regular Session",
             "identifier": "92nd",
             "classification": "primary",
-            "_scraped_name": "92   (2001-2002)",
+            "_scraped_name": "92nd General Assembly (2001-2002)",
             "start_date": "2001-01-10",
             "end_date": "2003-01-07",
         },
@@ -39,7 +38,7 @@ class Illinois(State):
             "name": "93rd Regular Session",
             "identifier": "93rd",
             "classification": "primary",
-            "_scraped_name": "93   (2003-2004)",
+            "_scraped_name": "93rd General Assembly (2003-2004)",
             "start_date": "2003-01-08",
             "end_date": "2005-01-11",
         },
@@ -54,7 +53,7 @@ class Illinois(State):
             "name": "94th Regular Session",
             "identifier": "94th",
             "classification": "primary",
-            "_scraped_name": "94   (2005-2006)",
+            "_scraped_name": "94th General Assembly (2005-2006)",
             "start_date": "2005-01-12",
             "end_date": "2007-01-09",
         },
@@ -62,7 +61,7 @@ class Illinois(State):
             "name": "95th Regular Session",
             "identifier": "95th",
             "classification": "primary",
-            "_scraped_name": "95   (2007-2008)",
+            "_scraped_name": "95th General Assembly (2007-2008)",
             "start_date": "2007-01-10",
             "end_date": "2009-01-13",
         },
@@ -77,7 +76,7 @@ class Illinois(State):
             "name": "96th Regular Session",
             "identifier": "96th",
             "classification": "primary",
-            "_scraped_name": "96   (2009-2010)",
+            "_scraped_name": "96th General Assembly (2009-2010)",
             "start_date": "2009-01-14",
             "end_date": "2011-01-11",
         },
@@ -92,7 +91,7 @@ class Illinois(State):
             "name": "97th Regular Session",
             "identifier": "97th",
             "classification": "primary",
-            "_scraped_name": "97   (2011-2012)",
+            "_scraped_name": "97th General Assembly (2011-2012)",
             "start_date": "2011-01-12",
             "end_date": "2013-01-08",
         },
@@ -100,7 +99,7 @@ class Illinois(State):
             "name": "98th Regular Session",
             "identifier": "98th",
             "classification": "primary",
-            "_scraped_name": "98   (2013-2014)",
+            "_scraped_name": "98th General Assembly (2013-2014)",
             "start_date": "2013-01-09",
             "end_date": "2015-01-13",
         },
@@ -108,7 +107,7 @@ class Illinois(State):
             "name": "99th Regular Session",
             "identifier": "99th",
             "classification": "primary",
-            "_scraped_name": "99   (2015-2016)",
+            "_scraped_name": "99th General Assembly (2015-2016)",
             "start_date": "2015-01-14",
             "end_date": "2017-01-10",
         },
@@ -116,7 +115,7 @@ class Illinois(State):
             "name": "100th Special Session",
             "identifier": "100th-special",
             "classification": "special",
-            "_scraped_name": "100   (2017-2018)",
+            "_scraped_name": "100th General Assembly (2017-2018)",
             "start_date": "2017-06-21",
             "end_date": "2017-06-21",
         },
@@ -133,12 +132,12 @@ class Illinois(State):
             "start_date": "2019-01-09",
             "end_date": "2019-12-14",
             "classification": "primary",
-            "_scraped_name": "101   (2019-2020)",
+            "_scraped_name": "101st General Assembly (2019-2020)",
         },
         # Leave this on until 2023-01-31,
         # IL has a history post-session governor actions
         {
-            "_scraped_name": "102   (2021-2022)",
+            "_scraped_name": "102nd General Assembly (2021-2022)",
             "name": "102nd Regular Session",
             "identifier": "102nd",
             "start_date": "2021-01-13",
@@ -148,7 +147,16 @@ class Illinois(State):
         },
         # check senate prez in session_details in bills.py
         # https://www.ilga.gov/house/schedules/2024_House_Spring_Session.pdf
-        {
+        # {
+        #     "name": "103rd Regular Session",
+        #     "identifier": "103rd",
+        #     "start_date": "2023-01-11",
+        #     "end_date": "2024-05-24",
+        #     "classification": "primary",
+        #     "active": False,
+        # },
+        {
+            "_scraped_name": "103rd General Assembly (2023-2024)",
             "name": "103rd Regular Session",
             "identifier": "103rd",
             "start_date": "2023-01-11",
@@ -159,20 +167,26 @@ class Illinois(State):
     ]
 
     ignored_scraped_sessions = [
-        "77   (1971-1972)",
-        "78   (1973-1974)",
-        "79   (1975-1976)",
-        "80   (1977-1978)",
-        "81   (1979-1980)",
-        "82   (1981-1982)",
-        "83   (1983-1984)",
-        "84   (1985-1986)",
-        "85   (1987-1988)",
-        "86   (1989-1990)",
-        "87   (1991-1992)",
-        "88   (1993-1994)",
-        "89   (1995-1996)",
+        "89th General Assembly (1995-1996)",
+        "88th General Assembly (1993-1994)",
+        "87th General Assembly (1991-1992)",
+        "86th General Assembly (1989-1990)",
+        "85th General Assembly (1987-1988)",
+        "84th General Assembly (1985-1986)",
+        "83rd General Assembly (1983-1984)",
+        "82nd General Assembly (1981-1982)",
+        "81st General Assembly (1979-1980)",
+        "80th General Assembly (1977-1978)",
+        "79th General Assembly (1975-1976)",
+        "78th General Assembly (1973-1974)",
+        "77th General Assembly (1971-1972)",
     ]
 
     def get_session_list(self):
-        return url_xpath("https://ilga.gov/PreviousGA.asp", "//option/text()")
+        response = requests.get(
+            "https://beta.ilga.gov/API/Legislation/GetGeneralAssemblies"
+        )
+        response.raise_for_status()
+        session_list = [ga["gaLabel"] for ga in response.json()]
+
+        return session_list
diff --git a/scrapers/il/bills.py b/scrapers/il/bills.py
index b45d261ed1..926dced675 100644
--- a/scrapers/il/bills.py
+++ b/scrapers/il/bills.py
@@ -279,19 +279,22 @@ def chamber_slug(chamber):
 
 
 class IlBillScraper(Scraper):
-    LEGISLATION_URL = "https://ilga.gov/legislation/grplist.asp"
+    LEGISLATION_URL = "https://beta.ilga.gov/Legislation/"
     localize = pytz.timezone("America/Chicago").localize
 
     def get_bill_urls(self, chamber, session, doc_type):
         params = session_details[session]["params"]
-        params["num1"] = "1"
-        params["num2"] = "10000"
-        params["DocTypeID"] = doc_type
-        html = self.get(self.LEGISLATION_URL, params=params).text
+        url = "https://beta.ilga.gov/Legislation/RegularSession/{}?SessionId={}".format(
+            doc_type,
+            params["SessionId"],
+        )
+        html = self.get(url).text
         doc = lxml.html.fromstring(html)
-        doc.make_links_absolute(self.LEGISLATION_URL)
+        doc.make_links_absolute(url)
 
-        for bill_url in doc.xpath("//li/a/@href"):
+        for bill_url in doc.xpath(
+            '//div[@id="div_0001"]//table//td[1]/a[contains(@href, "DocNum=")]/@href'
+        ):
             yield bill_url
 
     def scrape(self, session=None):
@@ -321,16 +324,9 @@ def scrape(self, session=None):
                     chamber, session_id, "AM", bill_url, "appointment"
                 )
 
-            # TODO: get joint session resolution added to python-opencivicdata
-            # for bill_url in self.get_bill_urls(chamber, session_id, 'JSR'):
-            #     bill, votes = self.scrape_bill(chamber, session_id, 'JSR', bill_url,
-            #                                    'joint session resolution')
-            #     yield bill
-            #     yield from votes
-
     def scrape_archive_bills(self, session):
         session_abr = session[0:2]
-        url = f"https://www.ilga.gov/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
+        url = f"https://beta.ilga.gov/documents/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
         html = self.get(url).text
         doc = lxml.html.fromstring(html)
         doc.make_links_absolute(url)
@@ -338,6 +334,7 @@ def scrape_archive_bills(self, session):
 
         # Contains multiple bills
         for bill_numbers_section_url in bill_numbers_sections:
+            bill_numbers_section_url = clean_archivebill_url(bill_numbers_section_url)
             bill_section_html = self.get(bill_numbers_section_url).text
             bill_section_doc = lxml.html.fromstring(bill_section_html)
             bill_section_doc.make_links_absolute(bill_numbers_section_url)
@@ -351,6 +348,7 @@ def scrape_archive_bills(self, session):
 
             # Actual Bill Pages
             for bill_url in bills_urls:
+                bill_url = clean_archivebill_url(bill_url)
 
                 bill_html = self.get(bill_url).text
                 bill_doc = lxml.html.fromstring(bill_html)
@@ -377,6 +375,7 @@ def scrape_archive_bills(self, session):
                     summary_page_url = bill_doc.xpath(
                         '//a[contains (., "Bill Summary")]/@href'
                     )[0]
+                    summary_page_url = clean_archivebill_url(summary_page_url)
                     summary_page_html = self.get(summary_page_url).text
                     summary_page_doc = lxml.html.fromstring(summary_page_html)
                     summary_page_doc.make_links_absolute(summary_page_url)
@@ -387,6 +386,7 @@ def scrape_archive_bills(self, session):
                     bill_url = bill_doc.xpath('//a[contains (., "Bill Status")]/@href')[
                         0
                     ]
+                    bill_url = clean_archivebill_url(bill_url)
                     bill_html = self.get(bill_url).text
                     bill_doc = lxml.html.fromstring(bill_html)
                     bill_doc.make_links_absolute(bill_url)
@@ -421,6 +421,7 @@ def scrape_archive_bills(self, session):
 
                 # Bill version
                 version_url = bill_doc.xpath('//a[contains (., "Full Text")]/@href')[0]
+                version_url = clean_archivebill_url(version_url)
                 bill.add_version_link(bill_id, version_url, media_type="text/html")
 
                 # Actions
@@ -483,19 +484,15 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None):
         bill_type = bill_type or DOC_TYPES[doc_type[1:]]
         bill_id = doc_type + bill_num
 
-        title = doc.xpath(
-            '//span[text()="Short Description:"]/following-sibling::span[1]/' "text()"
-        )[0].strip()
+        title = doc.xpath('//div[@id="content"]/div[1]/div/h5/text()')[0].strip()
         # 1. Find the heading with "Synopsis As Introduced" for text.
         # 2. Go to the next heading.
         # 3. Backtrack and grab everything to, but not including, #1.
         # 4. Grab text of all, including nested, nodes.
-        summary_nodes = doc.xpath(
-            '//span[text()="Synopsis As Introduced"]/following-sibling::span[contains(@class, "heading2")]/'
-            'preceding-sibling::*[preceding-sibling::span[text()="Synopsis As Introduced"]]//'
+        summary = doc.xpath(
+            '//h5[text()="Synopsis As Introduced"]/../div[@class="list-group"]/span/'
             "text()"
-        )
-        summary = "\n".join([node.strip() for node in summary_nodes])
+        )[0].strip()
 
         bill = Bill(
             identifier=bill_id,
@@ -509,14 +506,15 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None):
 
         bill.add_source(url)
         # sponsors
-        sponsor_list = build_sponsor_list(doc.xpath('//a[contains(@class, "content")]'))
+        sponsor_list = build_sponsor_list(
+            doc.xpath('//div[@id="sponsorDiv"]//a[@class="notranslate"]')
+        )
         # don't add just yet; we can make them better using action data
-
         # actions
-        action_tds = doc.xpath('//a[@name="actions"]/following-sibling::table[1]/td')
+        action_tds = doc.xpath('//h5[text()="Actions"]/../table//td')
         for date, actor, action_elem in group(action_tds, 3):
             date = datetime.datetime.strptime(date.text_content().strip(), "%m/%d/%Y")
-            date = self.localize(date).date()
+            date = date.date()
             actor = actor.text_content()
             actor_id = "upper" if actor == "Senate" else "lower"
 
@@ -581,56 +579,54 @@ def scrape_documents(self, bill, version_url):
         if "HTML full text does not exist for this appropriations document" in html:
             pdf_only = True
 
-        for link in doc.xpath('//a[contains(@href, "fulltext")]'):
-            name = link.text
+        for link in doc.xpath(
+            '//div[@id="content"]/div[contains(@class, "row")]//a[contains(@class, "content")]'
+        ):
+            name = link.text_content().strip()
             url = link.get("href")
-
             # Ignore the "Printer-friendly version" link
             # That link is a "latest version" alias for an actual, distinct version
-            if "print=true" not in url:
-                if name in VERSION_TYPES or "amendment" in name.lower():
-                    if pdf_only:
-                        # eed to visit the version's page, and get PDF link from there
-                        # otherwise get a faulty "latest version"/"LV" alias/duplicate
-                        version_page_html = self.get(url).text
-                        version_page_doc = lxml.html.fromstring(version_page_html)
-                        version_page_doc.make_links_absolute(url)
-                        pdf_link = version_page_doc.xpath('//a[text()="PDF"]')[0]
-                        url = pdf_link.get("href")
-                        mimetype = "application/pdf"
-                    else:
-                        url = "{}&print=true".format(url)
-                        mimetype = "text/html"
-
-                        version_id = re.search(
-                            r"DocName=(.*?)&", url, flags=re.IGNORECASE
-                        ).group(1)
-                        doctype = re.search(
-                            r"DocTypeId=(.*?)&", url, flags=re.IGNORECASE
-                        ).group(1)
-                        # numeric component of the session id
-                        session_number = int(
-                            "".join(
-                                char
-                                for char in bill.legislative_session
-                                if char.isdigit()
-                            )
-                        )
-
-                        # if it's html, extract the pdf link too while we're here.
-                        pdf_url = f"https://ilga.gov/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
-                        bill.add_version_link(
-                            name, pdf_url, media_type="application/pdf"
-                        )
-
-                    bill.add_version_link(name, url, media_type=mimetype)
-                elif name in FULLTEXT_DOCUMENT_TYPES:
-                    bill.add_document_link(name, url)
-                elif "Printer-Friendly" in name:
-                    pass
+            if name in VERSION_TYPES or "amendment" in name.lower():
+                if pdf_only:
+                    # eed to visit the version's page, and get PDF link from there
+                    # otherwise get a faulty "latest version"/"LV" alias/duplicate
+                    url = "{}&Print=1".format(url)
+                    version_page_html = self.get(url).text
+                    version_page_doc = lxml.html.fromstring(version_page_html)
+                    version_page_doc.make_links_absolute(url)
+                    pdf_link = version_page_doc.xpath('//a[contains(@href, "PDF")]')
+                    if not pdf_link:
+                        continue
+                    pdf_link = pdf_link[0]
+                    url = pdf_link.get("href")
+                    mimetype = "application/pdf"
                 else:
-                    self.warning("unknown document type %s - adding as document" % name)
-                    bill.add_document_link(name, url)
+                    url = "{}&Print=1".format(url)
+                    mimetype = "text/html"
+                    version_id = re.search(
+                        r"DocName=(.*?)&", url, flags=re.IGNORECASE
+                    ).group(1)
+                    doctype = re.search(
+                        r"DocTypeId=(.*?)&", url, flags=re.IGNORECASE
+                    ).group(1)
+                    # numeric component of the session id
+                    session_number = int(
+                        "".join(
+                            char for char in bill.legislative_session if char.isdigit()
+                        )
+                    )
+                    # if it's html, extract the pdf link too while we're here.
+                    pdf_url = f"https://beta.ilga.gov/documents/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
+                    bill.add_version_link(name, pdf_url, media_type="application/pdf")
+
+                bill.add_version_link(name, url, media_type=mimetype)
+            elif name in FULLTEXT_DOCUMENT_TYPES:
+                bill.add_document_link(name, url)
+            elif "Printer-Friendly" in name:
+                pass
+            else:
+                self.warning("unknown document type %s - adding as document" % name)
+                bill.add_document_link(name, url)
 
     def scrape_votes(self, session, bill, votes_url):
         html = self.get(votes_url).text
@@ -644,7 +640,7 @@ def scrape_votes(self, session, bill, votes_url):
             pieces = link.text.split(" - ")
             date = pieces[-1]
 
-            vote_type = link.xpath("../ancestor::table[1]//td[1]/text()")[0]
+            vote_type = link.xpath("../a/text()")[0]
             if vote_type == "Committee Hearing Votes":
                 chamber = link.xpath("../following-sibling::td/text()")[0]
                 actor = "upper" if chamber == "SENATE" else "lower"
@@ -915,9 +911,9 @@ def build_sponsor_list(sponsor_atags):
     spontype = "cosponsor"
     for atag in sponsor_atags:
         sponsor = atag.text
-        if "house" in atag.attrib["href"].split("/"):
+        if "house" in atag.attrib["href"].lower().split("/"):
             chamber = "lower"
-        elif "senate" in atag.attrib["href"].split("/"):
+        elif "senate" in atag.attrib["href"].lower().split("/"):
             chamber = "upper"
         else:
             chamber = None
@@ -934,3 +930,9 @@ def build_sponsor_list(sponsor_atags):
             official_spontype = "cosponsor"  # until replaced
         sponsors.append((spontype, sponsor, chamber, official_spontype))
     return sponsors
+
+
+def clean_archivebill_url(url):
+    if "https://beta.ilga.gov/documents/" not in url:
+        url = url.replace("https://beta.ilga.gov/", "https://beta.ilga.gov/documents/")
+    return url
diff --git a/scrapers/il/events.py b/scrapers/il/events.py
index 6d90977beb..be949c2da3 100644
--- a/scrapers/il/events.py
+++ b/scrapers/il/events.py
@@ -1,4 +1,6 @@
+from calendar import month
 import datetime as dt
+import json
 import lxml
 import re
 
@@ -8,8 +10,8 @@
 import pytz
 
 urls = {
-    "upper": "https://www.ilga.gov/senate/schedules/weeklyhearings.asp",
-    "lower": "https://www.ilga.gov/house/schedules/weeklyhearings.asp",
+    "upper": "https://beta.ilga.gov/Senate/Schedules",
+    "lower": "https://beta.ilga.gov/House/Schedules",
 }
 
 chamber_names = {
@@ -34,12 +36,16 @@ def scrape_page(self, url, chamber):
         doc = lxml.html.fromstring(html)
         doc.make_links_absolute(url)
 
-        ctty_name = doc.xpath("//span[@class='heading']")[0].text_content()
+        ctty_name = doc.xpath('//*[@id="main-content"]/section[2]//h2')[
+            0
+        ].text_content()
 
         # Remove prefixes from the name like "Hearing notice for"
         ctty_name = ctty_name_re.match(ctty_name).group(4)
 
-        tables = doc.xpath("//table[@cellpadding='3']")
+        tables = doc.xpath(
+            '//div[contains(@class, "card")][.//h4[contains(., "Hearing Details")]]//table'
+        )
         if not tables:
             self.warning(f"Empty hearing data for {url}")
             return False, False
@@ -47,12 +53,11 @@ def scrape_page(self, url, chamber):
         rows = info.xpath(".//tr")
         metainf = {}
         for row in rows:
-            tds = row.xpath(".//td")
-            key = tds[0].text_content().strip()
-            value = tds[1].text_content().strip()
+            tds = "".join(row.xpath(".//td//text()")).split(":")
+            key = tds[0].strip()
+            value = ":".join(tds[1:]).strip()
             metainf[key] = value
-
-        where = metainf["Location:"]
+        where = metainf["Location"]
 
         description = f"{chamber} {ctty_name}"
         # Remove committee suffix from names
@@ -64,12 +69,13 @@ def scrape_page(self, url, chamber):
             descr_parts = description.split("-")
             description = " - ".join([x.strip() for x in descr_parts])
 
-        datetime = metainf["Scheduled Date:"]
+        datetime = metainf["Date"]
         datetime = re.sub(r"\s+", " ", datetime)
         repl = {"AM": " AM", "PM": " PM"}  # Space shim.
         for r in repl:
             datetime = datetime.replace(r, repl[r])
-        datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))
+        # datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))
+        datetime = self.localize(dt.datetime.strptime(datetime, "%m/%d/%Y %I:%M %p"))
 
         event_name = f"{description}#{where}#{datetime}"
         event = Event(description, start_date=datetime, location_name=where)
@@ -78,20 +84,24 @@ def scrape_page(self, url, chamber):
 
         event.add_participant(ctty_name, "organization")
 
-        bills = tables[1]
-        for bill in bills.xpath(".//tr")[1:]:
-            tds = bill.xpath(".//td")
-            if len(tds) < 4:
-                continue
-            # First, let's get the bill ID:
-            bill_id = tds[0].text_content()
-
-            # Apply correct spacing to bill id
-            (alpha, num) = bill_re.match(bill_id).groups()
-            bill_id = f"{alpha} {num}"
-
-            agenda_item = event.add_agenda_item(bill_id)
-            agenda_item.add_bill(bill_id)
+        bills = doc.xpath(
+            '//div[contains(@class, "card")][.//h4[contains(., "Bills Assigned To Hearing")]]//table'
+        )
+        if bills:
+            bills = bills[0]
+            for bill in bills.xpath(".//tr")[1:]:
+                tds = bill.xpath(".//td")
+                if len(tds) < 4:
+                    continue
+                # First, let's get the bill ID:
+                bill_id = tds[0].text_content()
+
+                # Apply correct spacing to bill id
+                (alpha, num) = bill_re.match(bill_id).groups()
+                bill_id = f"{alpha} {num}"
+
+                agenda_item = event.add_agenda_item(bill_id)
+                agenda_item.add_bill(bill_id)
 
         return event, event_name
 
@@ -112,14 +122,15 @@ def scrape(self):
                 no_scheduled_ct += 1
                 continue
 
-            tables = doc.xpath("//table[@width='550']")
+            tables = doc.xpath('//*[@id="pane-Week"]//table//tr')
             events = set()
             for table in tables:
-                meetings = table.xpath(".//a")
+                meetings = table.xpath(".//button")
                 for meeting in meetings:
-                    event, name = self.scrape_page(
-                        meeting.attrib["href"], chamber_names[chamber]
-                    )
+                    meeting_url = "https://beta.ilga.gov" + meeting.attrib[
+                        "onclick"
+                    ].replace("location.href=", "").strip("'. ")
+                    event, name = self.scrape_page(meeting_url, chamber_names[chamber])
                     if event and name:
                         if name in events:
                             self.warning(f"Duplicate event {name}")

From 5279c5c7ee5138d0eb622f0beac3e3b4e7ec6496 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Mon, 9 Dec 2024 15:34:53 +0100
Subject: [PATCH 2/4] fix: lint issue

---
 scrapers/il/__init__.py | 2 --
 scrapers/il/events.py   | 2 --
 2 files changed, 4 deletions(-)

diff --git a/scrapers/il/__init__.py b/scrapers/il/__init__.py
index 2d3aa32a76..0d5cd5f278 100644
--- a/scrapers/il/__init__.py
+++ b/scrapers/il/__init__.py
@@ -1,7 +1,5 @@
 # encoding=utf-8
-from urllib import response
 import requests
-from utils import url_xpath
 from openstates.scrape import State
 from .bills import IlBillScraper
 from .events import IlEventScraper
diff --git a/scrapers/il/events.py b/scrapers/il/events.py
index be949c2da3..22b36db838 100644
--- a/scrapers/il/events.py
+++ b/scrapers/il/events.py
@@ -1,6 +1,4 @@
-from calendar import month
 import datetime as dt
-import json
 import lxml
 import re
 

From e0190f88d567dcbe3668c54d9fb3912b94c0f947 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Tue, 10 Dec 2024 21:14:33 +0100
Subject: [PATCH 3/4] Fix small issue

---
 scrapers/il/bills.py  | 14 ++++++++------
 scrapers/il/events.py | 12 ++++++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/scrapers/il/bills.py b/scrapers/il/bills.py
index 926dced675..c2168f500c 100644
--- a/scrapers/il/bills.py
+++ b/scrapers/il/bills.py
@@ -9,6 +9,7 @@
 from openstates.utils import convert_pdf
 
 
+BASE_URL = "https://beta.ilga.gov"
 central = pytz.timezone("US/Central")
 
 
@@ -239,7 +240,7 @@
 }
 
 DUPE_VOTES = {
-    "https://ilga.gov/legislation/votehistory/100/house/committeevotes/"
+    f"{BASE_URL}/legislation/votehistory/100/house/committeevotes/"
     "10000HB2457_16401.pdf"
 }
 
@@ -279,12 +280,13 @@ def chamber_slug(chamber):
 
 
 class IlBillScraper(Scraper):
-    LEGISLATION_URL = "https://beta.ilga.gov/Legislation/"
+    LEGISLATION_URL = f"{BASE_URL}/Legislation/"
     localize = pytz.timezone("America/Chicago").localize
 
     def get_bill_urls(self, chamber, session, doc_type):
         params = session_details[session]["params"]
-        url = "https://beta.ilga.gov/Legislation/RegularSession/{}?SessionId={}".format(
+        url = "{}/Legislation/RegularSession/{}?SessionId={}".format(
+            BASE_URL,
             doc_type,
             params["SessionId"],
         )
@@ -293,7 +295,7 @@ def get_bill_urls(self, chamber, session, doc_type):
         doc.make_links_absolute(url)
 
         for bill_url in doc.xpath(
-            '//div[@id="div_0001"]//table//td[1]/a[contains(@href, "DocNum=")]/@href'
+            '//div[contains(@id,"div_")]//table//td[1]/a[contains(@href, "DocNum=")]/@href'
         ):
             yield bill_url
 
@@ -326,7 +328,7 @@ def scrape(self, session=None):
 
     def scrape_archive_bills(self, session):
         session_abr = session[0:2]
-        url = f"https://beta.ilga.gov/documents/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
+        url = f"{BASE_URL}/documents/legislation/legisnet{session_abr}/{session_abr}gatoc.html"
         html = self.get(url).text
         doc = lxml.html.fromstring(html)
         doc.make_links_absolute(url)
@@ -616,7 +618,7 @@ def scrape_documents(self, bill, version_url):
                         )
                     )
                     # if it's html, extract the pdf link too while we're here.
-                    pdf_url = f"https://beta.ilga.gov/documents/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
+                    pdf_url = f"{BASE_URL}/documents/legislation/{session_number}/{doctype}/PDF/{version_id}.pdf"
                     bill.add_version_link(name, pdf_url, media_type="application/pdf")
 
                 bill.add_version_link(name, url, media_type=mimetype)
diff --git a/scrapers/il/events.py b/scrapers/il/events.py
index 22b36db838..4820f30b06 100644
--- a/scrapers/il/events.py
+++ b/scrapers/il/events.py
@@ -7,9 +7,10 @@
 
 import pytz
 
+BASE_URL = "https://beta.ilga.gov"
 urls = {
-    "upper": "https://beta.ilga.gov/Senate/Schedules",
-    "lower": "https://beta.ilga.gov/House/Schedules",
+    "upper": f"{BASE_URL}/Senate/Schedules",
+    "lower": f"{BASE_URL}/House/Schedules",
 }
 
 chamber_names = {
@@ -72,7 +73,6 @@ def scrape_page(self, url, chamber):
         repl = {"AM": " AM", "PM": " PM"}  # Space shim.
         for r in repl:
             datetime = datetime.replace(r, repl[r])
-        # datetime = self.localize(dt.datetime.strptime(datetime, "%b %d, %Y %I:%M %p"))
         datetime = self.localize(dt.datetime.strptime(datetime, "%m/%d/%Y %I:%M %p"))
 
         event_name = f"{description}#{where}#{datetime}"
@@ -125,9 +125,9 @@ def scrape(self):
             for table in tables:
                 meetings = table.xpath(".//button")
                 for meeting in meetings:
-                    meeting_url = "https://beta.ilga.gov" + meeting.attrib[
-                        "onclick"
-                    ].replace("location.href=", "").strip("'. ")
+                    meeting_url = BASE_URL + meeting.attrib["onclick"].replace(
+                        "location.href=", ""
+                    ).strip("'. ")
                     event, name = self.scrape_page(meeting_url, chamber_names[chamber])
                     if event and name:
                         if name in events:

From 6437443edffadf062492146a51fbc80a80abaec0 Mon Sep 17 00:00:00 2001
From: braykuka <braykuka@gmail.com>
Date: Fri, 13 Dec 2024 16:49:48 +0100
Subject: [PATCH 4/4] fix: no abstract issue

---
 scrapers/il/bills.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scrapers/il/bills.py b/scrapers/il/bills.py
index c2168f500c..916c0c3fad 100644
--- a/scrapers/il/bills.py
+++ b/scrapers/il/bills.py
@@ -504,7 +504,8 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None):
             chamber=chamber,
         )
 
-        bill.add_abstract(summary, note="")
+        if summary:
+            bill.add_abstract(summary, note="")
 
         bill.add_source(url)
         # sponsors