Skip to content

Commit

Permalink
Merge pull request #4954 from chrisyamas/fl_bills
Browse files Browse the repository at this point in the history
FL: add headers for bill page request
  • Loading branch information
NewAgeAirbender authored May 20, 2024
2 parents ad668c4 + 7dc0436 commit 8977285
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions scrapers/fl/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def process_page(self):
if a line contains (H|S)(\\d+) that bill gets current subject
"""
subjects = defaultdict(set)

SUBJ_RE = re.compile("^[A-Z ,()]+$")
BILL_RE = re.compile(r"[HS]\d+(?:-[A-Z])?")

Expand Down Expand Up @@ -272,7 +271,12 @@ def process_analysis(self):
if date:
name += " (%s)" % date
analysis_url = tr.xpath("td/a")[0].attrib["href"]
self.input.add_document_link(name, analysis_url, on_duplicate="ignore")
self.input.add_document_link(
name,
analysis_url,
media_type="application/pdf",
on_duplicate="ignore",
)
except IndexError:
self.logger.warning(
"No analysis table for {}".format(self.input.identifier)
Expand Down Expand Up @@ -639,7 +643,15 @@ def get_source_from_input(self):
}[self.input.legislative_session]

form = {"Chamber": "B", "SessionId": session_number, "BillNumber": bill_number}
return url + "?" + urlencode(form)
return URL(
url + "?" + urlencode(form),
method="GET",
headers={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Host": "www.myfloridahouse.gov",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
},
)

def process_item(self, item):
return HouseBillPage(self.input, source=item)
Expand Down

0 comments on commit 8977285

Please sign in to comment.