From 309a0866811572b40e524f68232c79ad57e0acd0 Mon Sep 17 00:00:00 2001 From: showerst Date: Mon, 20 Nov 2023 16:35:37 -0500 Subject: [PATCH] GU: Bills: fixes for chamber, versions (#4727) --- scrapers/gu/bills.py | 66 +++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/scrapers/gu/bills.py b/scrapers/gu/bills.py index 04ecec15e5..a74b3dc454 100644 --- a/scrapers/gu/bills.py +++ b/scrapers/gu/bills.py @@ -108,22 +108,26 @@ def _process_bill(self, session: str, bill: str, root_url: str): bill_obj.add_source(url=bill_link, note="Bill Introduced") details = self._get_bill_details(bill_link) if details.get("IntroducedDate", None): - bill_obj.add_action("Introduced", details["IntroducedDate"]) + bill_obj.add_action( + "Introduced", details["IntroducedDate"], chamber="legislature" + ) if details.get("ReferredDate", None): if details["Committee"]: bill_obj.add_action( "Referred To Committee", details["ReferredDate"], - organization=details["Committee"], + chamber="legislature", ) else: bill_obj.add_action( - "Referred To Committee", details["ReferredDate"] + "Referred To Committee", + details["ReferredDate"], + chamber="legislature", ) yield bill_obj else: - bill_obj.add_document_link( + bill_obj.add_version_link( url=bill_link, note="Bill Introduced", media_type="application/pdf" ) status = xml.xpath("//li")[0].xpath("a/@href")[0] @@ -156,24 +160,39 @@ def _process_bill(self, session: str, bill: str, root_url: str): for link in xml.xpath("//li")[1:]: url = link.xpath("a/@href")[0] title = link.xpath("a")[0].text - bill_obj.add_document_link( - url=url, note=title, media_type="application/pdf" - ) + if "fiscal note" in title.lower(): + bill_obj.add_document_link( + url=url, + note=title, + media_type="application/pdf", + on_duplicate="ignore", + ) + else: + bill_obj.add_version_link( + url=url, + note=title, + media_type="application/pdf", + on_duplicate="ignore", + ) # status PDF has introduced/passed/etc. dates details = self._get_bill_details(status) if details.get("IntroducedDate", None): - bill_obj.add_action("Introduced", details["IntroducedDate"]) + bill_obj.add_action( + "Introduced", details["IntroducedDate"], chamber="legislature" + ) if details.get("ReferredDate", None): if details["Committee"]: bill_obj.add_action( "Referred To Committee", details["ReferredDate"], - organization=details["Committee"], + chamber="legislature", ) else: bill_obj.add_action( - "Referred To Committee", details["ReferredDate"] + "Referred To Committee", + details["ReferredDate"], + chamber="legislature", ) yield bill_obj @@ -215,7 +234,7 @@ def _process_resolution(self, session: str, bill: str, root_url: str): result_date = self._tz.localize(dateutil.parser.parse(result_data[1])) if result and result_date: - bill_obj.add_action(result, result_date) + bill_obj.add_action(result, result_date, chamber="legislature") bill_obj.add_sponsorship( name=sponsors[0], @@ -233,15 +252,30 @@ def _process_resolution(self, session: str, bill: str, root_url: str): for link in xml.xpath("//li"): url = link.xpath("a/@href")[0] title = link.xpath("a")[0].text - bill_obj.add_document_link( - url=url, note=title, media_type="application/pdf" - ) + if "fiscal note" in title.lower(): + bill_obj.add_document_link( + url=url, + note=title, + media_type="application/pdf", + on_duplicate="ignore", + ) + else: + bill_obj.add_version_link( + url=url, + note=title, + media_type="application/pdf", + on_duplicate="ignore", + ) details = self._get_resolution_details(bill_link) if details.get("IntroducedDate", None): - bill_obj.add_action("Introduced", details["IntroducedDate"]) + bill_obj.add_action( + "Introduced", details["IntroducedDate"], chamber="legislature" + ) if details.get("PresentationDate", None): - bill_obj.add_action("Presented", details["PresentationDate"]) + bill_obj.add_action( + "Presented", details["PresentationDate"], chamber="legislature" + ) yield bill_obj def scrape(self, session):