From e79b3b9a902bff3f22c86b7c9854303cbc78a935 Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:52:15 -0500 Subject: [PATCH] Deprecate spider cook_pharmaceutical_disposal --- .../spiders/cook_pharmaceutical_disposal.py | 112 ------ tests/files/cook_pharmaceutical_disposal.html | 380 ------------------ tests/test_cook_pharmaceutical_disposal.py | 92 ----- 3 files changed, 584 deletions(-) delete mode 100644 city_scrapers/spiders/cook_pharmaceutical_disposal.py delete mode 100644 tests/files/cook_pharmaceutical_disposal.html delete mode 100644 tests/test_cook_pharmaceutical_disposal.py diff --git a/city_scrapers/spiders/cook_pharmaceutical_disposal.py b/city_scrapers/spiders/cook_pharmaceutical_disposal.py deleted file mode 100644 index 208f659f1..000000000 --- a/city_scrapers/spiders/cook_pharmaceutical_disposal.py +++ /dev/null @@ -1,112 +0,0 @@ -from datetime import datetime, time - -from city_scrapers_core.constants import ADVISORY_COMMITTEE -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class CookPharmaceuticalDisposalSpider(CityScrapersSpider): - name = "cook_pharmaceutical_disposal" - agency = "Cook County Pharmaceutical Disposal Advisory Committee" - timezone = "America/Chicago" - start_urls = ["https://www.cookcountysheriff.org/rx/advisory-committee/"] - - def parse(self, response): - """ - `parse` should always `yield` Meeting items. - """ - # Scraping past meetings - for item in response.xpath("//div[@class='col-sm-12 ']/p"): - self._ignore = False - start = self._parse_start(item) - if not start: - continue - meeting = Meeting( - title=self._parse_title(item), - description=self._parse_description(item), - classification=self._parse_classification(item), - start=start, - end=self._parse_end(item), - all_day=self._parse_all_day(item), - time_notes=self._parse_time_notes(item), - location=self._parse_location(item), - links=self._parse_links(item, response), - source=self._parse_source(response), - ) - - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - - if self._ignore is False: - yield meeting - - def _parse_title(self, item): - """Parse or generate meeting title.""" - return "Safe Disposal of Pharmaceuticals Advisory Committee" - - def _parse_description(self, item): - """Parse or generate meeting description.""" - return "" - - def _parse_classification(self, item): - """Parse or generate classification from allowed options.""" - return ADVISORY_COMMITTEE - - def _parse_start(self, item): - """Parse start datetime as a naive datetime object.""" - # Dates are not correct for some meetings as they - # are not provided in the URL - # Dates format is not consistent in the URLs - report_links = item.xpath(".//a/@href") - if len(report_links) == 0: - return - report_obj = report_links[0] - report_desc = report_obj.get().split("/")[-1].split("-") - if report_desc[2].isdigit(): - date_str = report_desc[0][:-1] + report_desc[1] + report_desc[2] - date_obj = datetime.strptime(date_str, "%b%d%Y").date() - elif report_desc[-3].isdigit(): - date_str = report_desc[-3] + report_desc[-2] + report_desc[-1][:-4] - date_obj = datetime.strptime(date_str, "%m%d%y").date() - else: - date_str = report_desc[-2] + report_desc[-1][:-4] - date_obj = datetime.strptime(date_str, "%m%y").date() - # Dates cannot be retrieved, ignore meeting - self._ignore = True - return datetime.combine(date_obj, time(13)) - - def _parse_end(self, item): - """Parse end datetime as a naive datetime object. Added by pipeline if None""" - return None - - def _parse_time_notes(self, item): - """Parse any additional notes on the timing of the meeting""" - return "See agenda to confirm exact times" - - def _parse_all_day(self, item): - """Parse or generate all-day status. Defaults to False.""" - return False - - def _parse_location(self, item): - """Parse or generate location.""" - return { - "address": "50 W Washington St, Room 407, Chicago, IL 60602", - "name": "Daley Center", - } - - def _parse_links(self, item, response): - """Parse or generate links.""" - links = list() - for report_obj in item.xpath(".//a/@href"): - report_desc = report_obj.get().split("/")[-1] - links.append( - { - "href": response.urljoin(report_obj.get()), - "title": report_desc[:-4].replace("-", " "), - } - ) - return links - - def _parse_source(self, response): - """Parse or generate source.""" - return response.url diff --git a/tests/files/cook_pharmaceutical_disposal.html b/tests/files/cook_pharmaceutical_disposal.html deleted file mode 100644 index e3966d376..000000000 --- a/tests/files/cook_pharmaceutical_disposal.html +++ /dev/null @@ -1,380 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - Advisory Committee > Cook County Sheriffs - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-Skip to main content - - - - - - -
-
-
- - - - -
-
- -
- - - - - - - - - - -
- -

Search

-
-

Recent Press Releases

- -
-
-
- - - -
- - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/tests/test_cook_pharmaceutical_disposal.py b/tests/test_cook_pharmaceutical_disposal.py deleted file mode 100644 index c430a0004..000000000 --- a/tests/test_cook_pharmaceutical_disposal.py +++ /dev/null @@ -1,92 +0,0 @@ -from datetime import datetime -from os.path import dirname, join - -import pytest -from city_scrapers_core.constants import ADVISORY_COMMITTEE, PASSED -from city_scrapers_core.utils import file_response -from freezegun import freeze_time - -from city_scrapers.spiders.cook_pharmaceutical_disposal import ( - CookPharmaceuticalDisposalSpider, -) - -test_response = file_response( - join(dirname(__file__), "files", "cook_pharmaceutical_disposal.html"), - url="https://www.cookcountysheriff.org/rx/advisory-committee/", -) -spider = CookPharmaceuticalDisposalSpider() - -freezer = freeze_time("2020-10-06") -freezer.start() - -parsed_items = [item for item in spider.parse(test_response)] - -freezer.stop() - - -def test_title(): - assert ( - parsed_items[0]["title"] - == "Safe Disposal of Pharmaceuticals Advisory Committee" - ) - - -def test_description(): - assert parsed_items[0]["description"] == "" - - -def test_start(): - assert parsed_items[0]["start"] == datetime(2019, 12, 10, 13, 0) - - -def test_end(): - assert parsed_items[0]["end"] is None - - -def test_time_notes(): - assert parsed_items[0]["time_notes"] == "See agenda to confirm exact times" - - -def test_id(): - assert ( - parsed_items[0]["id"] - == "cook_pharmaceutical_disposal/201912101300/x/" - + "safe_disposal_of_pharmaceuticals_advisory_committee" - ) - - -def test_status(): - assert parsed_items[0]["status"] == PASSED - - -def test_location(): - assert parsed_items[0]["location"] == { - "address": "50 W Washington St, Room 407, Chicago, IL 60602", - "name": "Daley Center", - } - - -def test_source(): - assert ( - parsed_items[0]["source"] - == "https://www.cookcountysheriff.org/rx/advisory-committee/" - ) - - -def test_links(): - assert parsed_items[0]["links"] == [ - { - "href": "https://www.cookcountysheriff.org/wp-content/uploads/" - + "2019/11/Dec.-10-2019-Advisory-Committee-Meeting-Agenda.pdf", - "title": "Dec. 10 2019 Advisory Committee Meeting Agenda", - } - ] - - -def test_classification(): - assert parsed_items[0]["classification"] == ADVISORY_COMMITTEE - - -@pytest.mark.parametrize("item", parsed_items) -def test_all_day(item): - assert item["all_day"] is False