From ad99e4de48874a9423a9eefecf0be95a6258b3f7 Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:59:43 -0500 Subject: [PATCH] Deprecate spider chi_ssa_29 --- city_scrapers/spiders/chi_ssa_29.py | 114 ------- tests/files/chi_ssa_29.html | 459 ---------------------------- tests/test_chi_ssa_29.py | 85 ------ 3 files changed, 658 deletions(-) delete mode 100644 city_scrapers/spiders/chi_ssa_29.py delete mode 100644 tests/files/chi_ssa_29.html delete mode 100644 tests/test_chi_ssa_29.py diff --git a/city_scrapers/spiders/chi_ssa_29.py b/city_scrapers/spiders/chi_ssa_29.py deleted file mode 100644 index e0e4d5268..000000000 --- a/city_scrapers/spiders/chi_ssa_29.py +++ /dev/null @@ -1,114 +0,0 @@ -import re -from datetime import datetime, time - -from city_scrapers_core.constants import COMMISSION -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class ChiSsa29Spider(CityScrapersSpider): - name = "chi_ssa_29" - agency = "Chicago Special Service Area #29 2014 West Town" - timezone = "America/Chicago" - start_urls = ["http://www.westtownssa.org/transparency/"] - location = { - "name": "West Town Chamber of Commerce", - "address": "1819 W Chicago Ave Chicago, IL 60622", - } - - def parse(self, response): - """ - `parse` should always `yield` Meeting items. - - Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping - needs. - """ - self._validate_location(response) - meeting_dicts = {} - # Iterate through current meetings, creating dicts of variable values - for item in response.css(".content_block:first-child div > div > div"): - meeting_str = item.xpath("./text()").extract_first() or "" - start = self._parse_start(meeting_str) - if not start: - continue - title = self._parse_title(meeting_str) - meeting_dicts[start.date()] = { - "start": start, - "title": title, - } - # Iterate through minutes, adding links to existing meetings or create new ones - for item in response.css(".content_attachments a"): - meeting_str = item.css(".pdf_icon::text").extract_first() or "" - start = self._parse_start(meeting_str) - if not start: - continue - title = self._parse_title(meeting_str) - links = [{"title": "Minutes", "href": item.attrib["href"]}] - if start.date() in meeting_dicts: - meeting_dicts[start.date()]["links"] = links - else: - meeting_dicts[start.date()] = { - "title": title, - "start": start, - "links": links, - } - for item in meeting_dicts.values(): - meeting = Meeting( - title=item["title"], - description="", - classification=COMMISSION, - start=item["start"], - end=None, - all_day=False, - time_notes="", - location=self.location, - links=item.get("links", []), - source=response.url, - ) - - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - - yield meeting - - def _parse_title(self, meeting_str): - """Parse or generate meeting title.""" - special_match = re.search(r"(?<=\().*Meeting(?=\))", meeting_str) - if special_match: - return special_match.group().strip() - return "Commission" - - def _parse_start(self, meeting_str): - """Parse start datetime as a naive datetime object.""" - date_match = re.search( - r"[a-zA-Z]{3,10} \d{1,2}([a-z]{2})?,? \d{4}", meeting_str - ) - if date_match: - date_str = re.sub(r"(,|(?<=\d)[a-z]{2}(?=[, ]))", "", date_match.group()) - date_obj = datetime.strptime(date_str, "%B %d %Y").date() - else: - date_match = re.search( - r"^\d{2}\.\d{2}\.\d{2,4}(?= Minutes$)", meeting_str.strip() - ) - # Return early if match not found here - if not date_match: - return - date_str = date_match.group() - dt_fmt = "%m.%d.%Y" if len(date_str) == 10 else "%m.%d.%y" - date_obj = datetime.strptime(date_str, dt_fmt) - time_match = re.search(r"\d{1,2}(:\d{2})? ?[apm\.]{2,4}", meeting_str) - if time_match: - time_str = re.sub(r"[\s\.]", "", time_match.group()) - time_fmt = "%I:%M%p" if ":" in time_str else "%I%p" - time_obj = datetime.strptime(time_str, time_fmt).time() - else: - time_obj = time(11) - return datetime.combine(date_obj, time_obj) - - def _validate_location(self, response): - """Verify that location hasn't changed""" - if ( - "1819 w" - not in " ".join(response.css(".events_block div::text").extract()).lower() - ): - raise ValueError("Meeting location has changed") diff --git a/tests/files/chi_ssa_29.html b/tests/files/chi_ssa_29.html deleted file mode 100644 index fd9815752..000000000 --- a/tests/files/chi_ssa_29.html +++ /dev/null @@ -1,459 +0,0 @@ - - - - - West town SSA - - - - - - - - - - - - -

I

- -
Main background
-
-
-
-
- -
- - -
-
-
-
-
-
-

SSA Commission Meetings

- - - - -
-
-

SSA Commission Meetings

-
- All regularly scheduled SSA Commission meetings are held at 11am on the first Thursday of each month at the West Town Chicago Chamber of Commerce, located at 1819 W. Chicago Ave., Chicago, IL 60622, unless notified of the changes below.

All approved meeting minutes are posted below.

2019 Schedule


February 7th, 2019
March 7th, 2019
April 4th, 2019
April 29, 2019  at 2pm (Special Audit Review Meeting)
May 2nd, 2019
June 6th, 2019
July 11th, 2019 (meeting is on the 2nd Thursday due to the 4th of July Holiday)
August 1st, 2019
September 5th, 2019
October 3rd, 2019
November 7th, 2019
December 5th, 2019

- -
-
- - - - - - - - - - - - - - - - - - - -
-
-
-
-

Yearly Audits

- - - - - - - -
- -
- - - - - - - - - - - - - - - - -
-
-
-
-

Request For Proposals

- - - - - - - - - - -
-
-

RFP's

-
-
Please check back regularly, as we will be posting all RFP's here.
- -
-
- - - - - - - - - - - - - -
-
-
-
-

SSA Budget

- - - - - - - - - - - - - -
- -
- - - - - - - - - - -
-
-
-
-

SSA Commission Meeting Minutes

- - - - - - - - - - - - - - - - -
- -
- - - -
- -
- - - - - - -
-
-
-
-

SSA Reconstitution & Expansion

- - - - - - - - - - - - - - - - - - - - -
-
-

SSA Reconstitution & Expansion

-
-
The transcripts from the public hearing regarding the SSA Reconstitution will be made available on the City Clerk's website at an unknown date: https://chicago.legistar.com/Calendar.aspx 

The West Town SSA#29 expired on December 31, 2014. The West Town SSA has officially been reconstituted and it's new number is now SSA 29-2014. 

Over a 2 year span (2013 & 2014) we attended Neighborhood Group Meetings, hosted 5 Community Meetings, and had 2 Public Hearings at City Hall. We received over 20% of signatures from property owners that were in favor of the SSA, and a resounding show of support at both our Public Hearings. 

Because funding for SSA29-2014 will arrive after the second installment of the 2014 property tax bill, we will not be able to start services in the new district until Fall 2015. Services in the expired SSA29 district will continue until we are able to start services in the new district. 

If you have any questions, comments or concerns regarding the West Town SSA Reconstitution & Expansion, please feel free to call or e-mail:
Katharine (Kace) Wakem
West Town Chamber of Commerce
312-850-9390
SSA@WestTownChamber.org

All West Town SSA Reconstitution & Expansion supporting documentation is here. 


- - - - - - - - - - - - - - - - - - - - - - - -
-
- - - -
-
-
-
-
-
- - -
- - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/tests/test_chi_ssa_29.py b/tests/test_chi_ssa_29.py deleted file mode 100644 index b984d0e26..000000000 --- a/tests/test_chi_ssa_29.py +++ /dev/null @@ -1,85 +0,0 @@ -from datetime import datetime -from operator import itemgetter -from os.path import dirname, join - -import pytest # noqa -from city_scrapers_core.constants import COMMISSION, PASSED -from city_scrapers_core.utils import file_response -from freezegun import freeze_time - -from city_scrapers.spiders.chi_ssa_29 import ChiSsa29Spider - -test_response = file_response( - join(dirname(__file__), "files", "chi_ssa_29.html"), - url="http://www.westtownssa.org/transparency/", -) -spider = ChiSsa29Spider() - -freezer = freeze_time("2019-07-02") -freezer.start() - -parsed_items = sorted( - [item for item in spider.parse(test_response)], key=itemgetter("start") -) - -freezer.stop() - - -def test_count(): - assert len(parsed_items) == 23 - - -def test_title(): - assert parsed_items[0]["title"] == "Commission" - assert parsed_items[14]["title"] == "Special Audit Review Meeting" - - -def test_description(): - assert parsed_items[0]["description"] == "" - - -def test_start(): - assert parsed_items[0]["start"] == datetime(2018, 1, 4, 11, 0) - assert parsed_items[14]["start"] == datetime(2019, 4, 29, 14) - - -def test_end(): - assert parsed_items[0]["end"] is None - - -def test_time_notes(): - assert parsed_items[0]["time_notes"] == "" - - -def test_id(): - assert parsed_items[0]["id"] == "chi_ssa_29/201801041100/x/commission" - - -def test_status(): - assert parsed_items[0]["status"] == PASSED - - -def test_location(): - assert parsed_items[0]["location"] == spider.location - - -def test_source(): - assert parsed_items[0]["source"] == "http://www.westtownssa.org/transparency/" - - -def test_links(): - assert parsed_items[0]["links"] == [ - { - "title": "Minutes", - "href": "http://www.westtownssa.org/content/directory/attachments/events/e/elsrmc/1.4.18 Minutes.pdf", # noqa - } - ] - assert parsed_items[-1]["links"] == [] - - -def test_classification(): - assert parsed_items[0]["classification"] == COMMISSION - - -def test_all_day(): - assert parsed_items[0]["all_day"] is False