From 3ad19fd982c1e18b77abb9e370d2a38d7d60e588 Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:52:19 -0500 Subject: [PATCH] Deprecate spider chi_ssa_23 --- city_scrapers/spiders/chi_ssa_23.py | 162 ------- tests/files/chi_ssa_23.html | 640 ---------------------------- tests/test_chi_ssa_23.py | 152 ------- 3 files changed, 954 deletions(-) delete mode 100644 city_scrapers/spiders/chi_ssa_23.py delete mode 100644 tests/files/chi_ssa_23.html delete mode 100644 tests/test_chi_ssa_23.py diff --git a/city_scrapers/spiders/chi_ssa_23.py b/city_scrapers/spiders/chi_ssa_23.py deleted file mode 100644 index 5fcbad89b..000000000 --- a/city_scrapers/spiders/chi_ssa_23.py +++ /dev/null @@ -1,162 +0,0 @@ -import re -import unicodedata -from datetime import datetime, timedelta - -from city_scrapers_core.constants import COMMISSION -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class ChiSsa23Spider(CityScrapersSpider): - name = "chi_ssa_23" - agency = "Chicago Special Service Area #23 Clark Street" - timezone = "America/Chicago" - start_urls = ["https://www.lincolnparkchamber.com/clark-street-ssa-administration/"] - location = { - "name": "Lincoln Park Chamber of Commerce", - "address": "2468 N. Lincoln Chicago, IL 60614", - } - # Each meeting takes place on Wednesday 4 PM - meeting_day = "Wednesday" - time = "4:00 pm" - - def parse(self, response): - address_text = response.xpath('//div[@class = "address"][1]/text()').extract()[ - 1 - ] - self._validate_location(address_text) - - h4s = response.xpath("//h4") - - # General meeting description is mentioned just after the H4 for current year - general_desc = h4s.xpath("following-sibling::p[1]//em//text()").extract_first() - - # Dictionary containing all meeting dictionaries - # The dates will be the keys - meetings = dict() - - last_year = datetime.today().replace(year=datetime.today().year - 1) - - for entry_cnt, entry in enumerate(h4s, start=1): - entry_str = entry.xpath("./text()").extract_first() - test_year = entry_str[0:4] - - if "Schedule" in entry_str: - for item in entry.xpath( - "following-sibling::ol[1]//li//text()" - ).getall(): - date, start, end = self._parse_date_start_end(item, test_year) - - meetings[date] = { - "start": start, - "end": end, - # Scheduled appointments have no links - "links": [], - } - - elif "Agendas" in entry_str or "Minutes" in entry_str: - # Only consider ps between two h4s - for p in entry.xpath( - "following-sibling::p[count(preceding-sibling::h4)=" "$entry_cnt]", - entry_cnt=entry_cnt, - ): - # The non-breaking space signals the end of the meeting lists - if ( - p.xpath("./text()") - and "\xa0" in p.xpath("./text()").extract_first() - ): - break - - for item in p.xpath("./a"): - item_str = item.xpath("./text()").extract_first() - date, start, end = self._parse_date_start_end( - item_str, test_year - ) - - item_links = item.xpath("@href").extract() - links = self._parse_links(item_links, entry_str) - - if date in meetings: - meetings[date]["links"].extend(links) - - else: - meetings[date] = { - "start": start, - "end": end, - "links": links, - } - - # Create the meeting objects - for key, item in meetings.items(): - if item["start"] < last_year and not self.settings.getbool( - "CITY_SCRAPERS_ARCHIVE" - ): - continue - - meeting = Meeting( - title="Commission", - description=unicodedata.normalize("NFKD", general_desc), - classification=COMMISSION, - start=item["start"], - end=item["end"], - time_notes="Estimated 90 minutes duration", - all_day=False, - location=self.location, - links=item["links"], - source=response.url, - ) - - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - yield meeting - - def _parse_date_start_end(self, item, year): - """ - Parse start date and time. - """ - # Check for explicit start times in the string - try: - start_time = re.search(r"\((.*?)\)", item).group(1) - - except AttributeError: - start_time = self.time - - # Split the month day string and make sure to drop the year before that - dm_str = item.split(",")[0].split() - - # Adding a 0 as padding for single-digit days - if len(dm_str[1]) < 2: - dm_str[1] = "0" + dm_str[1] - dt_str = dm_str[0] + " " + dm_str[1] - - start = datetime.strptime( - "{} {} {} {}".format( - self.meeting_day, re.sub(r"[,\.]", "", dt_str), start_time, year - ), - "%A %B %d %I:%M %p %Y", - ) - date = start.date() - end = start + timedelta(minutes=90) - - return date, start, end - - def _parse_links(self, items, entry_str): - documents = [] - for url in items: - if url: - documents.append(self._build_link_dict(url, entry_str)) - - return documents - - def _build_link_dict(self, url, entry_str): - if "agenda" in entry_str.lower(): - return {"href": url, "title": "Agenda"} - elif "minutes" in entry_str.lower(): - return {"href": url, "title": "Minutes"} - else: - return {"href": url, "title": "Link"} - - def _validate_location(self, text): - """Parse or generate location.""" - if "2468" not in text: - raise ValueError("Meeting location has changed") diff --git a/tests/files/chi_ssa_23.html b/tests/files/chi_ssa_23.html deleted file mode 100644 index 81870e1d5..000000000 --- a/tests/files/chi_ssa_23.html +++ /dev/null @@ -1,640 +0,0 @@ - - - - - - - Clark Street SSA Administration - Lincoln Park Chamber of Commerce - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-
-

Clark Street SSA Administration

-
-
-
- -
- -
-
-
-
-

2020 Clark Street SSA Commission Meeting Schedule

-

All meetings held Wednesdays at 4:00 p.m.  Meetings typically run 90 minutes. Please contact the LPCC to confirm meeting locations (773) 880-5200. 

-
    -
  1. February 5
  2. -
  3. March 24 (9:37 am)
  4. -
  5. April 3 (10:30 am)
  6. -
  7. April 22
  8. -
  9. May 27
  10. -
  11. July 8
  12. -
  13. September 9
  14. -
  15. November 18
  16. -
-

2020 Clark Street SSA Commission Meeting Agendas

-

February 5, 2020

-

March 24, 2020

-

April 3, 2020

-

April 22, 2020

-

2020 Clark Street SSA Commission Meeting Minutes

-

February 5, 2020

-

March 24, 2020

-

April 3, 2020

-

2019 Clark Street SSA Commission Meeting Agendas

-

January 23
-April 24
-May 15
-July 10
-September 4
-November 13

-

2019 Clark Street SSA Commission Meeting Minutes

-

January 23
-April 24
-May 15
-July 10
-September 4
-November 13

-
-

2018 Clark Street SSA Commission Meeting Agendas

-

February 7, 2018
-April 18, 2018
-May 23, 2018
-July 11, 2018
-September 5, 2018
-November 14, 2018

-

-

2018 Clark Street SSA Commission Meeting Minutes

-

February 7, 2018
-April 18, 2018
-May 23, 2018
-July 11, 2018
-September 5, 2018
-November 14, 2018

-
-

2017 Clark Street SSA Commission Meeting Agendas

-

September 27, 2017
-July 12, 2017
-May 24, 2017
-April 12, 2017
-January 25, 2017

-

-

2017 Clark Street SSA Commission Meeting Minutes

-

July 12, 2017
-May 24, 2017
-April 12, 2017
-January 25, 2017

-
-

2016 Clark Street SSA Commission Meeting Minutes

-

November 16, 2016
-September 21, 2016
-July 13, 2016
-April 20, 2017
-January 27, 2016

-

 

-

To view the 2019 Clark Street Special Service Area #23 Service Provider Agreement, click here.

- -
-
-
-
Current Request for Proposals
-

The Lincoln Park Chamber of Commerce is not currently accepting bids for services.

- -
-
-
-
Clark Street SSA #23 Service Area Boundaries
-

- -
-
-
- -
- -
- -
- -
- - - - -
- -
-
-
-
- -
-
-

Stay in touch

-

Trying to keep up with everything that's happening in your Lincoln Park neighborhood? Subscribe today for email updates based on your interests.

- -
-
- -
-
-
- -
-
-
- - -
-
- loading... - - - -
-
- - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/tests/test_chi_ssa_23.py b/tests/test_chi_ssa_23.py deleted file mode 100644 index 7d855948e..000000000 --- a/tests/test_chi_ssa_23.py +++ /dev/null @@ -1,152 +0,0 @@ -from datetime import datetime -from os.path import dirname, join - -import pytest -from city_scrapers_core.constants import COMMISSION, PASSED, TENTATIVE -from city_scrapers_core.utils import file_response -from freezegun import freeze_time -from scrapy.settings import Settings - -from city_scrapers.spiders.chi_ssa_23 import ChiSsa23Spider - -test_response = file_response( - join(dirname(__file__), "files", "chi_ssa_23.html"), - url="https://www.lincolnparkchamber.com/clark-street-ssa-administration/", -) - -spider = ChiSsa23Spider() -spider.settings = Settings(values={"CITY_SCRAPERS_ARCHIVE": False}) - -freezer = freeze_time("2020-05-11") -freezer.start() - -parsed_items = sorted( - [item for item in spider.parse(test_response)], - key=lambda i: i["start"], - reverse=True, -) - -freezer.stop() - - -def test_count(): - assert len(parsed_items) == 12 - - -@pytest.mark.parametrize("item", parsed_items) -def test_title(item): - assert item["title"] == "Commission" - - -@pytest.mark.parametrize("item", parsed_items) -def test_description(item): - assert ( - item["description"] == "All meetings held Wednesdays at 4:00 p.m. " - "Meetings typically run 90 minute" - "s. Please contact the LPCC to confirm meeting " - "locations (773) 880-5200. " - ) - - -def test_start(): - expected_starts = [ - datetime(2020, 11, 18, 16, 0), - datetime(2020, 9, 9, 16, 0), - datetime(2020, 7, 8, 16, 0), - datetime(2020, 5, 27, 16, 0), - datetime(2020, 4, 22, 16, 0), - datetime(2020, 4, 3, 10, 30), - datetime(2020, 3, 24, 9, 37), - datetime(2020, 2, 5, 16, 0), - datetime(2019, 11, 13, 16, 0), - datetime(2019, 9, 4, 16, 0), - datetime(2019, 7, 10, 16, 0), - datetime(2019, 5, 15, 16, 0), - ] - for i in range(len(parsed_items)): - assert parsed_items[i]["start"] == expected_starts[i] - - -def test_end(): - expected_ends = [ - datetime(2020, 11, 18, 17, 30), - datetime(2020, 9, 9, 17, 30), - datetime(2020, 7, 8, 17, 30), - datetime(2020, 5, 27, 17, 30), - datetime(2020, 4, 22, 17, 30), - datetime(2020, 4, 3, 12, 00), - datetime(2020, 3, 24, 11, 7), - datetime(2020, 2, 5, 17, 30), - datetime(2019, 11, 13, 17, 30), - datetime(2019, 9, 4, 17, 30), - datetime(2019, 7, 10, 17, 30), - datetime(2019, 5, 15, 17, 30), - ] - for i in range(len(parsed_items)): - assert parsed_items[i]["end"] == expected_ends[i] - - -@pytest.mark.parametrize("item", parsed_items) -def test_time_notes(item): - assert item["time_notes"] == "Estimated 90 minutes duration" - - -def test_id(): - expected_ids = [ - "chi_ssa_23/202011181600/x/commission", - "chi_ssa_23/202009091600/x/commission", - "chi_ssa_23/202007081600/x/commission", - "chi_ssa_23/202005271600/x/commission", - "chi_ssa_23/202004221600/x/commission", - "chi_ssa_23/202004031030/x/commission", - "chi_ssa_23/202003240937/x/commission", - "chi_ssa_23/202002051600/x/commission", - "chi_ssa_23/201911131600/x/commission", - "chi_ssa_23/201909041600/x/commission", - "chi_ssa_23/201907101600/x/commission", - "chi_ssa_23/201905151600/x/commission", - ] - for i in range(len(parsed_items)): - assert parsed_items[i]["id"] == expected_ids[i] - - -def test_status(): - expected_status = [ - TENTATIVE, - TENTATIVE, - TENTATIVE, - TENTATIVE, - PASSED, - PASSED, - PASSED, - PASSED, - PASSED, - PASSED, - PASSED, - PASSED, - ] - for i in range(len(parsed_items)): - assert parsed_items[i]["status"] == expected_status[i] - - -@pytest.mark.parametrize("item", parsed_items) -def test_location(item): - assert item["location"] == { - "name": "Lincoln Park Chamber of Commerce", - "address": "2468 N. Lincoln Chicago, IL 60614", - } - - -@pytest.mark.parametrize("item", parsed_items) -def test_source(item): - assert item["source"] == test_response.url - - -@pytest.mark.parametrize("item", parsed_items) -def test_classification(item): - assert item["classification"] == COMMISSION - - -def test_all_day(): - for i in range(len(parsed_items)): - assert parsed_items[i]["all_day"] is False