From e79b3b9a902bff3f22c86b7c9854303cbc78a935 Mon Sep 17 00:00:00 2001
From: Daniel Simmons-Ritchie
<37225902+SimmonsRitchie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 15:52:15 -0500
Subject: [PATCH] Deprecate spider cook_pharmaceutical_disposal
---
.../spiders/cook_pharmaceutical_disposal.py | 112 ------
tests/files/cook_pharmaceutical_disposal.html | 380 ------------------
tests/test_cook_pharmaceutical_disposal.py | 92 -----
3 files changed, 584 deletions(-)
delete mode 100644 city_scrapers/spiders/cook_pharmaceutical_disposal.py
delete mode 100644 tests/files/cook_pharmaceutical_disposal.html
delete mode 100644 tests/test_cook_pharmaceutical_disposal.py
diff --git a/city_scrapers/spiders/cook_pharmaceutical_disposal.py b/city_scrapers/spiders/cook_pharmaceutical_disposal.py
deleted file mode 100644
index 208f659f1..000000000
--- a/city_scrapers/spiders/cook_pharmaceutical_disposal.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from datetime import datetime, time
-
-from city_scrapers_core.constants import ADVISORY_COMMITTEE
-from city_scrapers_core.items import Meeting
-from city_scrapers_core.spiders import CityScrapersSpider
-
-
-class CookPharmaceuticalDisposalSpider(CityScrapersSpider):
- name = "cook_pharmaceutical_disposal"
- agency = "Cook County Pharmaceutical Disposal Advisory Committee"
- timezone = "America/Chicago"
- start_urls = ["https://www.cookcountysheriff.org/rx/advisory-committee/"]
-
- def parse(self, response):
- """
- `parse` should always `yield` Meeting items.
- """
- # Scraping past meetings
- for item in response.xpath("//div[@class='col-sm-12 ']/p"):
- self._ignore = False
- start = self._parse_start(item)
- if not start:
- continue
- meeting = Meeting(
- title=self._parse_title(item),
- description=self._parse_description(item),
- classification=self._parse_classification(item),
- start=start,
- end=self._parse_end(item),
- all_day=self._parse_all_day(item),
- time_notes=self._parse_time_notes(item),
- location=self._parse_location(item),
- links=self._parse_links(item, response),
- source=self._parse_source(response),
- )
-
- meeting["status"] = self._get_status(meeting)
- meeting["id"] = self._get_id(meeting)
-
- if self._ignore is False:
- yield meeting
-
- def _parse_title(self, item):
- """Parse or generate meeting title."""
- return "Safe Disposal of Pharmaceuticals Advisory Committee"
-
- def _parse_description(self, item):
- """Parse or generate meeting description."""
- return ""
-
- def _parse_classification(self, item):
- """Parse or generate classification from allowed options."""
- return ADVISORY_COMMITTEE
-
- def _parse_start(self, item):
- """Parse start datetime as a naive datetime object."""
- # Dates are not correct for some meetings as they
- # are not provided in the URL
- # Dates format is not consistent in the URLs
- report_links = item.xpath(".//a/@href")
- if len(report_links) == 0:
- return
- report_obj = report_links[0]
- report_desc = report_obj.get().split("/")[-1].split("-")
- if report_desc[2].isdigit():
- date_str = report_desc[0][:-1] + report_desc[1] + report_desc[2]
- date_obj = datetime.strptime(date_str, "%b%d%Y").date()
- elif report_desc[-3].isdigit():
- date_str = report_desc[-3] + report_desc[-2] + report_desc[-1][:-4]
- date_obj = datetime.strptime(date_str, "%m%d%y").date()
- else:
- date_str = report_desc[-2] + report_desc[-1][:-4]
- date_obj = datetime.strptime(date_str, "%m%y").date()
- # Dates cannot be retrieved, ignore meeting
- self._ignore = True
- return datetime.combine(date_obj, time(13))
-
- def _parse_end(self, item):
- """Parse end datetime as a naive datetime object. Added by pipeline if None"""
- return None
-
- def _parse_time_notes(self, item):
- """Parse any additional notes on the timing of the meeting"""
- return "See agenda to confirm exact times"
-
- def _parse_all_day(self, item):
- """Parse or generate all-day status. Defaults to False."""
- return False
-
- def _parse_location(self, item):
- """Parse or generate location."""
- return {
- "address": "50 W Washington St, Room 407, Chicago, IL 60602",
- "name": "Daley Center",
- }
-
- def _parse_links(self, item, response):
- """Parse or generate links."""
- links = list()
- for report_obj in item.xpath(".//a/@href"):
- report_desc = report_obj.get().split("/")[-1]
- links.append(
- {
- "href": response.urljoin(report_obj.get()),
- "title": report_desc[:-4].replace("-", " "),
- }
- )
- return links
-
- def _parse_source(self, response):
- """Parse or generate source."""
- return response.url
diff --git a/tests/files/cook_pharmaceutical_disposal.html b/tests/files/cook_pharmaceutical_disposal.html
deleted file mode 100644
index e3966d376..000000000
--- a/tests/files/cook_pharmaceutical_disposal.html
+++ /dev/null
@@ -1,380 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Advisory Committee > Cook County Sheriffs
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Navigation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/tests/test_cook_pharmaceutical_disposal.py b/tests/test_cook_pharmaceutical_disposal.py
deleted file mode 100644
index c430a0004..000000000
--- a/tests/test_cook_pharmaceutical_disposal.py
+++ /dev/null
@@ -1,92 +0,0 @@
-from datetime import datetime
-from os.path import dirname, join
-
-import pytest
-from city_scrapers_core.constants import ADVISORY_COMMITTEE, PASSED
-from city_scrapers_core.utils import file_response
-from freezegun import freeze_time
-
-from city_scrapers.spiders.cook_pharmaceutical_disposal import (
- CookPharmaceuticalDisposalSpider,
-)
-
-test_response = file_response(
- join(dirname(__file__), "files", "cook_pharmaceutical_disposal.html"),
- url="https://www.cookcountysheriff.org/rx/advisory-committee/",
-)
-spider = CookPharmaceuticalDisposalSpider()
-
-freezer = freeze_time("2020-10-06")
-freezer.start()
-
-parsed_items = [item for item in spider.parse(test_response)]
-
-freezer.stop()
-
-
-def test_title():
- assert (
- parsed_items[0]["title"]
- == "Safe Disposal of Pharmaceuticals Advisory Committee"
- )
-
-
-def test_description():
- assert parsed_items[0]["description"] == ""
-
-
-def test_start():
- assert parsed_items[0]["start"] == datetime(2019, 12, 10, 13, 0)
-
-
-def test_end():
- assert parsed_items[0]["end"] is None
-
-
-def test_time_notes():
- assert parsed_items[0]["time_notes"] == "See agenda to confirm exact times"
-
-
-def test_id():
- assert (
- parsed_items[0]["id"]
- == "cook_pharmaceutical_disposal/201912101300/x/"
- + "safe_disposal_of_pharmaceuticals_advisory_committee"
- )
-
-
-def test_status():
- assert parsed_items[0]["status"] == PASSED
-
-
-def test_location():
- assert parsed_items[0]["location"] == {
- "address": "50 W Washington St, Room 407, Chicago, IL 60602",
- "name": "Daley Center",
- }
-
-
-def test_source():
- assert (
- parsed_items[0]["source"]
- == "https://www.cookcountysheriff.org/rx/advisory-committee/"
- )
-
-
-def test_links():
- assert parsed_items[0]["links"] == [
- {
- "href": "https://www.cookcountysheriff.org/wp-content/uploads/"
- + "2019/11/Dec.-10-2019-Advisory-Committee-Meeting-Agenda.pdf",
- "title": "Dec. 10 2019 Advisory Committee Meeting Agenda",
- }
- ]
-
-
-def test_classification():
- assert parsed_items[0]["classification"] == ADVISORY_COMMITTEE
-
-
-@pytest.mark.parametrize("item", parsed_items)
-def test_all_day(item):
- assert item["all_day"] is False
- - -