From 2a42be9a2c57a286fca3c2c10b6d2e0ec4f45a43 Mon Sep 17 00:00:00 2001 From: NewAgeAirbender <34139325+NewAgeAirbender@users.noreply.github.com> Date: Wed, 12 Jun 2024 13:13:34 -0500 Subject: [PATCH] IL: temp remove committee parsing & votes --- scrapers/il/bills.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/scrapers/il/bills.py b/scrapers/il/bills.py index ea36df99d1..0cd2f2cbb3 100644 --- a/scrapers/il/bills.py +++ b/scrapers/il/bills.py @@ -7,7 +7,6 @@ import lxml.html from openstates.scrape import Scraper, Bill, VoteEvent from openstates.utils import convert_pdf -from ._utils import canonicalize_url central = pytz.timezone("US/Central") @@ -503,7 +502,7 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None): sponsor_list = build_sponsor_list(doc.xpath('//a[contains(@class, "content")]')) # don't add just yet; we can make them better using action data - committee_actors = {} + # committee_actors = {} # actions action_tds = doc.xpath('//a[@name="actions"]/following-sibling::table[1]/td') @@ -519,18 +518,19 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None): action = action_elem.text_content() classification, related_orgs = _categorize_action(action) - if related_orgs and any(c.startswith("committee") for c in classification): - try: - ((name, source),) = [ - (a.text, a.get("href")) - for a in action_elem.xpath("a") - if "committee" in a.get("href") - ] - source = canonicalize_url(source) - actor_id = {"sources__url": source, "classification": "committee"} - committee_actors[source] = name - except ValueError: - self.warning("Can't resolve voting body for %s" % classification) + # TODO: add as related_entity not actor + # if related_orgs and any(c.startswith("committee") for c in classification): + # try: + # ((name, source),) = [ + # (a.text, a.get("href")) + # for a in action_elem.xpath("a") + # if "committee" in a.get("href") + # ] + # source = canonicalize_url(source) + # actor_id = {"sources__url": source, "classification": "committee"} + # committee_actors[source] = name + # except ValueError: + # self.warning("Can't resolve voting body for %s" % classification) bill.add_action( action, @@ -562,8 +562,8 @@ def scrape_bill(self, chamber, session, doc_type, url, bill_type=None): yield bill # temporarily remove vote processing due to pdf issues - votes_url = doc.xpath('//a[text()="Votes"]/@href')[0] - yield from self.scrape_votes(session, bill, votes_url, committee_actors) + # votes_url = doc.xpath('//a[text()="Votes"]/@href')[0] + # yield from self.scrape_votes(session, bill, votes_url, committee_actors) def scrape_documents(self, bill, version_url): html = self.get(version_url).text