From cd9efafb84294ff577c379b5c2c39754dce1f5fd Mon Sep 17 00:00:00 2001 From: NewAgeAirbender <34139325+NewAgeAirbender@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:20:02 -0500 Subject: [PATCH 1/2] IN: update to https --- scrapers/in/bills.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scrapers/in/bills.py b/scrapers/in/bills.py index 6ef137a601..03509326d9 100644 --- a/scrapers/in/bills.py +++ b/scrapers/in/bills.py @@ -15,7 +15,7 @@ settings = dict(SCRAPELIB_TIMEOUT=600) -PROXY_BASE_URL = "http://in-proxy.openstates.org" +PROXY_BASE_URL = "https://in-proxy.openstates.org/" SCRAPE_WEB_VERSIONS = "INDIANA_SCRAPE_WEB_VERSIONS" in os.environ @@ -53,7 +53,7 @@ def _add_sponsor_if_not_blank(self, bill, sponsor, classification): def _get_bill_url(self, session, bill_id): bill_prefix, bill_number = self._get_bill_id_components(bill_id) - url_template = "http://iga.in.gov/legislative/{}/{}/{}" + url_template = "https://iga.in.gov/legislative/{}/{}/{}" try: url_segment = self._bill_prefix_map[bill_prefix]["url_segment"] @@ -253,8 +253,8 @@ def deal_with_version(self, version, bill, bill_id, chamber, session): def scrape_web_versions(self, session, bill, bill_id): # found via web inspector of the requests to - # http://iga.in.gov/documents/{doc_id} - # the web url for downloading a doc is http://iga.in.gov/documents/{doc_id}/download + # https://iga.in.gov/documents/{doc_id} + # the web url for downloading a doc is https://iga.in.gov/documents/{doc_id}/download # where doc_id is the data-myiga-actiondata attribute of the link # this id isn't available in the API, so we have to scrape it @@ -277,7 +277,7 @@ def scrape_web_versions(self, session, bill, bill_id): version_name = link.xpath("@title")[0] # found via web inspector of the requests to # http://iga.in.gov/documents/{doc_id} - download_link = f"http://iga.in.gov/documents/{doc_id}/download" + download_link = f"https://iga.in.gov/documents/{doc_id}/download" bill.add_version_link( version_name, download_link, @@ -292,7 +292,7 @@ def scrape_web_versions(self, session, bill, bill_id): doc_id = link.xpath("@data-myiga-actiondata")[0] document_title = link.xpath("div[1]/text()")[0].strip() document_name = "{} {}".format(version_name, document_title) - download_link = f"http://iga.in.gov/documents/{doc_id}/download" + download_link = f"https://iga.in.gov/documents/{doc_id}/download" bill.add_document_link( document_name, download_link, @@ -307,7 +307,7 @@ def scrape_web_versions(self, session, bill, bill_id): doc_id = link.xpath("@data-myiga-actiondata")[0] document_title = link.xpath("div[1]/text()")[0].strip() document_name = "{} {}".format(version_name, document_title) - download_link = f"http://iga.in.gov/documents/{doc_id}/download" + download_link = f"https://iga.in.gov/documents/{doc_id}/download" # If an amendment has passed, add it as a version, otherwise as a document if "passed" in document_title.lower(): bill.add_version_link( From 79b0667ba41d7c74b91cbc93c8fb9d64ae1096f3 Mon Sep 17 00:00:00 2001 From: NewAgeAirbender <34139325+NewAgeAirbender@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:24:10 -0500 Subject: [PATCH 2/2] IN: check for long subject --- scrapers/in/bills.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scrapers/in/bills.py b/scrapers/in/bills.py index 03509326d9..96f3608149 100644 --- a/scrapers/in/bills.py +++ b/scrapers/in/bills.py @@ -512,6 +512,11 @@ def scrape(self, session=None): # subjects subjects = [s["entry"] for s in bill_json["latestVersion"]["subjects"]] for subject in subjects: + subject = ( + subject + if not subject.startswith("PENSIONS AND RETIREMENT BENEFITS") + else "PENSIONS AND RETIREMENT BENEFITS; Public Retirement System (INPRS)" + ) bill.add_subject(subject) # Abstract