Skip to content

Commit

Permalink
Merge pull request #5021 from NewAgeAirbender/in_bills
Browse files Browse the repository at this point in the history
IN: Shorten Long Bill Subject & Update to HTTPS
  • Loading branch information
NewAgeAirbender authored Sep 4, 2024
2 parents ab6da77 + 79b0667 commit 6770e22
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions scrapers/in/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

settings = dict(SCRAPELIB_TIMEOUT=600)

PROXY_BASE_URL = "http://in-proxy.openstates.org"
PROXY_BASE_URL = "https://in-proxy.openstates.org/"
SCRAPE_WEB_VERSIONS = "INDIANA_SCRAPE_WEB_VERSIONS" in os.environ


Expand Down Expand Up @@ -53,7 +53,7 @@ def _add_sponsor_if_not_blank(self, bill, sponsor, classification):
def _get_bill_url(self, session, bill_id):
bill_prefix, bill_number = self._get_bill_id_components(bill_id)

url_template = "http://iga.in.gov/legislative/{}/{}/{}"
url_template = "https://iga.in.gov/legislative/{}/{}/{}"

try:
url_segment = self._bill_prefix_map[bill_prefix]["url_segment"]
Expand Down Expand Up @@ -253,8 +253,8 @@ def deal_with_version(self, version, bill, bill_id, chamber, session):

def scrape_web_versions(self, session, bill, bill_id):
# found via web inspector of the requests to
# http://iga.in.gov/documents/{doc_id}
# the web url for downloading a doc is http://iga.in.gov/documents/{doc_id}/download
# https://iga.in.gov/documents/{doc_id}
# the web url for downloading a doc is https://iga.in.gov/documents/{doc_id}/download
# where doc_id is the data-myiga-actiondata attribute of the link
# this id isn't available in the API, so we have to scrape it

Expand All @@ -277,7 +277,7 @@ def scrape_web_versions(self, session, bill, bill_id):
version_name = link.xpath("@title")[0]
# found via web inspector of the requests to
# http://iga.in.gov/documents/{doc_id}
download_link = f"http://iga.in.gov/documents/{doc_id}/download"
download_link = f"https://iga.in.gov/documents/{doc_id}/download"
bill.add_version_link(
version_name,
download_link,
Expand All @@ -292,7 +292,7 @@ def scrape_web_versions(self, session, bill, bill_id):
doc_id = link.xpath("@data-myiga-actiondata")[0]
document_title = link.xpath("div[1]/text()")[0].strip()
document_name = "{} {}".format(version_name, document_title)
download_link = f"http://iga.in.gov/documents/{doc_id}/download"
download_link = f"https://iga.in.gov/documents/{doc_id}/download"
bill.add_document_link(
document_name,
download_link,
Expand All @@ -307,7 +307,7 @@ def scrape_web_versions(self, session, bill, bill_id):
doc_id = link.xpath("@data-myiga-actiondata")[0]
document_title = link.xpath("div[1]/text()")[0].strip()
document_name = "{} {}".format(version_name, document_title)
download_link = f"http://iga.in.gov/documents/{doc_id}/download"
download_link = f"https://iga.in.gov/documents/{doc_id}/download"
# If an amendment has passed, add it as a version, otherwise as a document
if "passed" in document_title.lower():
bill.add_version_link(
Expand Down Expand Up @@ -512,6 +512,11 @@ def scrape(self, session=None):
# subjects
subjects = [s["entry"] for s in bill_json["latestVersion"]["subjects"]]
for subject in subjects:
subject = (
subject
if not subject.startswith("PENSIONS AND RETIREMENT BENEFITS")
else "PENSIONS AND RETIREMENT BENEFITS; Public Retirement System (INPRS)"
)
bill.add_subject(subject)

# Abstract
Expand Down

0 comments on commit 6770e22

Please sign in to comment.