diff --git a/city_scrapers/spiders/il_port_district.py b/city_scrapers/spiders/il_port_district.py deleted file mode 100644 index b5c060bd5..000000000 --- a/city_scrapers/spiders/il_port_district.py +++ /dev/null @@ -1,206 +0,0 @@ -import re -from collections import defaultdict -from datetime import datetime - -import scrapy -from city_scrapers_core.constants import BOARD, COMMITTEE, NOT_CLASSIFIED -from city_scrapers_core.items import Meeting -from city_scrapers_core.spiders import CityScrapersSpider - - -class IlPortDistrictSpider(CityScrapersSpider): - name = "il_port_district" - agency = "Illinois International Port District" - timezone = "America/Chicago" - allowed_domains = ["www.iipd.com"] - location = { - "name": "Illinois International Port District ", - "address": "3600 E. 95th St. Chicago, IL 60617", - } - - schedules_url = "https://www.iipd.com/calendar/schedules" - - def start_requests(self): - yield scrapy.Request( - url="https://www.iipd.com/calendar/agendas", callback=self.parse_agendas - ) - yield scrapy.Request( - url="https://www.iipd.com/about/board-meeting-minutes", - callback=self.parse_minutes, - ) - - @classmethod - def from_crawler(cls, crawler, *args, **kwargs): - spider = super().from_crawler(crawler, *args, **kwargs) - crawler.signals.connect(spider.spider_idle, signal=scrapy.signals.spider_idle) - return spider - - def spider_idle(self): - """ - Call parse_schedules if spider is idle (finished parsing minutes and agendas) - """ - self.crawler.signals.disconnect( - self.spider_idle, signal=scrapy.signals.spider_idle - ) - self.crawler.engine.crawl( - scrapy.Request(self.schedules_url, callback=self.parse_schedules), self - ) - raise scrapy.exceptions.DontCloseSpider - - def parse_schedules(self, response): - year = response.css(".rtecenter em::text").extract_first()[:4] - - rows = response.xpath("//tr") - meeting_types = rows[0].xpath(".//strong/text()").extract() - meeting_types = [x.strip(" :s") for x in meeting_types] - - strong_meetings = rows.xpath(".//strong/text()")[len(meeting_types) :].extract() - if len(strong_meetings) % 2 != 0: - strong_meetings.append("") - strong_meetings = list(zip(strong_meetings[0::2], strong_meetings[1::2])) - - additional_info = response.xpath("//p[contains(text(), '*')]/text()").extract() - changed_meeting_time = None - changed_time_matches = re.findall(r"\d{1,2}:\d{2}am|pm", additional_info[2]) - if len(changed_time_matches): - changed_meeting_time = changed_time_matches[0] - - self._validate_location(response) - - for i, row in enumerate(strong_meetings + rows[1:]): - if i >= len(strong_meetings): - meetings_dates = row.xpath(".//div/text()").extract() - if not meetings_dates: - continue - else: - meetings_dates = row - - for i, date in enumerate(meetings_dates): - if not date: - continue - - start = self._parse_start(date, year, changed_meeting_time) - - title = self._parse_title(date, meeting_types[i]) - - classification = self._parse_classification(i, meeting_types[i]) - - agendas_links = self.agendas_dict.get( - (classification, start.strftime("%B %Y")), [] - ) + self.agendas_dict.get( - (classification, start.strftime("%-m-%y")), [] - ) - - minutes_links = [] - - if classification == BOARD: - minutes_links = self.minutes_dict.get(start.date(), []) - - links = agendas_links + minutes_links - - meeting = Meeting( - title=title, - description="", - classification=classification, - start=start, - end=None, - all_day=False, - time_notes="", - location=self.location, - links=links, - source=response.url, - ) - - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - - yield meeting - - def parse_agendas(self, response): - file_names = response.xpath( - "//tr/td[@class='views-field views-field-title']/text()" - ).extract() - file_names = [x.strip("\n ") for x in file_names] - file_links = response.xpath("//tr/td/a[@class='file-download']/@href").extract() - agenda_file_groups = [] - for idx, file_link in enumerate(file_links): - clean_link = file_link.replace("%20", " ") - # Check two possible URL formats for agenda date patterns - agenda_date_match_1 = re.search(r"([\d\-]+).*?(?=.pdf)", clean_link) - agenda_date_match_2 = re.search(r"(?<=Agenda)(.*?)(?=.pdf)", clean_link) - if agenda_date_match_1: - agenda_file_groups.append( - (file_link, file_names[idx], agenda_date_match_1.group(1)) - ) - if agenda_date_match_2: - agenda_file_groups.append( - (file_link, file_names[idx], agenda_date_match_2.group().strip()) - ) - self.agendas_dict = defaultdict(list) - - for link, name, agenda_date in agenda_file_groups: - classification = BOARD if BOARD in name else COMMITTEE - self.agendas_dict[(classification, agenda_date.strip())].append( - {"title": " ".join([name, agenda_date]), "href": link} - ) - - def parse_minutes(self, response): - rows = response.xpath("//tr") - self.minutes_dict = {} - for row in rows: - file_name = row.xpath( - ".//td[@class='views-field views-field-title']/text()" - ).extract_first() - if not file_name: - continue - - file_name = file_name.strip("\n ") - file_name_dt = re.findall(r"(?:.*?)(?:\d{4})", file_name)[0] - file_name_dt = datetime.strptime(file_name_dt, "%B %d, %Y") - - file_link = row.xpath( - ".//td[@class='views-field views-field-field-file']/a/@href" - ).extract_first() - - self.minutes_dict.setdefault( - file_name_dt.date(), - [{"title": "Board Meeting Minutes", "href": file_link}], - ) - - def _parse_classification(self, i, meeting_types): - if BOARD in meeting_types: - return BOARD - elif COMMITTEE in meeting_types: - return COMMITTEE - else: - return NOT_CLASSIFIED - - def _parse_start(self, date, year, changed_meeting_time): - meeting_time = "9:00am" - - if date.startswith("***") or date.endswith("***"): - meeting_time = changed_meeting_time - elif date.startswith("*") or date.endswith("*"): - new_date = re.findall(r"(?<=\()(.*?)(?=NEW)", date) - date = new_date[0][:-3] if new_date else date - - date = date.strip(" *") - dt = " ".join([year, date, meeting_time]) - dt = datetime.strptime(dt, "%Y %B %d %I:%M%p") - - return dt - - def _parse_title(self, date, meeting_type): - if (date.startswith("**") or date.endswith("**")) and not ( - date.startswith("***") or date.endswith("***") - ): - return "Special " + meeting_type - else: - return meeting_type - - def _validate_location(self, response): - loc = response.xpath("//strong")[-1].xpath(".//text()").extract() - loc = [x.strip("\n ") for x in loc] - loc = " ".join(loc[-2:]) - if "3600" not in loc: - raise ValueError("Meeting location has changed") diff --git a/tests/files/il_port_district_agendas.html b/tests/files/il_port_district_agendas.html deleted file mode 100644 index 5190105d9..000000000 --- a/tests/files/il_port_district_agendas.html +++ /dev/null @@ -1,219 +0,0 @@ - - - - - - -Agendas | IIPD - - - - - - - - - - - - - - -

Skip to content »

- - - -
- IIPD -
- -
-
-
-
- -
-
- -

- Agendas

- - - - - -
-
- - -
-
- - - - -
-
- - - -
-
-
-
- - -
-
-
-

Current Board Meeting Agenda

-

The Illinois International Port District Board meets monthly. To view the current board meeting agenda, select a link below:

-
- - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Committee - Download - -
- Board Agenda - Download - -
- Leases and Agreement Committee Agenda - Download - -
- Finance and Personnel Committee Agenda - Download - -
-
- - - - - - -
-
-
-
- -
-
- - - - - \ No newline at end of file diff --git a/tests/files/il_port_district_minutes.html b/tests/files/il_port_district_minutes.html deleted file mode 100644 index 0a5c31a0d..000000000 --- a/tests/files/il_port_district_minutes.html +++ /dev/null @@ -1,1106 +0,0 @@ - - - - - - -Board Meeting Minutes | IIPD - - - - - - - - - - - - - - -

Skip to content »

- - - -
- IIPD -
- -
-
-
-
- -
-
- -

- Board Meeting Minutes

- - - - - -
-
- - -
-
- - - - -
-

Board meeting minutes will be posted after final approval.

-
- - - -
-
-
-
- - -
-
- - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- Date - Download - -
- September 19, 2019 - Download - -
- August 16, 2019 - Download - -
- July 19, 2019 - Download - -
- June 21, 2019 - Download - -
- May 17, 2019 - Download - -
- April 18, 2019 - Download - -
- March 19, 2019 - Download - -
- March 1, 2019 Special Board Meeting - Download - -
- February 15, 2019 - Download - -
- January 18, 2019 - Download - -
- December 21, 2018 - Download - -
- September 21, 2018 - Download - -
- November 29, 2018 Special Board Meeting - Download - -
- November 16, 2018 - Download - -
- August 21, 2018 - Download - -
- October 26, 2018 - Download - -
- July 20, 2018 - Download - -
- June 15, 2018 - Download - -
- May 18, 2018 - Download - -
- April 27, 2018 - Download - -
- March 16, 2018 - Download - -
- February 16, 2018 - Download - -
- January 19, 2018 - Download - -
- December 12, 2017 - Download - -
- November 17, 2017 - Download - -
- October 20, 2017 - Download - -
- September 15, 2017 - Download - -
- August 18, 2017 - Download - -
- July 21, 2017 - Download - -
- June 15, 2017 - Download - -
- May 19, 2017 - Download - -
- April 21, 2017 - Download - -
- March 17, 2017 - Download - -
- February 17, 2017 - Download - -
- January 20, 2017 - Download - -
- December 16, 2016 - Download - -
- November 16, 2016 - Download - -
- October 16, 2016 - Download - -
- September 16, 2016 - Download - -
- August 19, 2016 - Download - -
- July 15, 2016 - Download - -
- June 17, 2016 - Download - -
- May 20, 2016 - Download - -
- April 22, 2016 - Download - -
- March 18, 2016 - Download - -
- February 18, 2016 - Download - -
- January 15, 2016 - Download - -
- December 18, 2015 - Download - -
- November 20, 2015 - Download - -
- October 16, 2015 - Download - -
- September 18, 2015 - Download - -
- July 17, 2015 - Download - -
- June 19, 2015 - Download - -
- May 15, 2015 - Download - -
- April 17, 2015 - Download - -
- March 31, 2015 - Download - -
- January 16, 2015 - Download - -
- December 19, 2014 - Download - -
- October 17, 2014 - Download - -
- October 1, 2014 - Download - -
- September 19, 2014 - Download - -
- August 22, 2014 - Download - -
- July 18, 2014 - Download - -
- June 13, 2014 - Download - -
- May 16, 2014 - Download - -
- April 11, 2014 - Download - -
- March 21, 2014 - Download - -
- February 21, 2014 - Download - -
- January 17, 2014 - Download - -
- December 13, 2013 - Download - -
- November 22, 2013 - Download - -
- October 18, 2013 - Download - -
- September 20, 2013 - Download - -
- August 16, 2013 - Download - -
- June 14, 2013 - Download - -
- May 17, 2013 - Download - -
- April 19, 2013 - Download - -
- March 22, 2013 - Download - -
- February 15, 2013 - Download - -
- January 18, 2013 - Download - -
- December 14, 2012 - Download - -
- November 16, 2012 - Download - -
- October 19, 2012 - Download - -
- September 21, 2012 - Download - -
- August 17, 2012 - Download - -
- July 20, 2012 - Download - -
- June 15, 2012 - Download - -
- May 18, 2012 - Download - -
- April 20, 2012 - Download - -
- March 16, 2012 - Download - -
- February 17, 2012 - Download - -
- January 20, 2012 - Download - -
-
- - - - - - -
-
-
-
- -
-
- - - - - \ No newline at end of file diff --git a/tests/files/il_port_district_schedules.html b/tests/files/il_port_district_schedules.html deleted file mode 100644 index b3ed91d6d..000000000 --- a/tests/files/il_port_district_schedules.html +++ /dev/null @@ -1,240 +0,0 @@ - - - - - - -Schedules | IIPD - - - - - - - - - - - - - - -

Skip to content »

- - - -
- IIPD -
- -
-
-
-
- -
-
- -

- Schedules

- - - - - -
-
- - -
-
- - - - -
-

2019 Meeting Schedule

- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
Committee Meetings:
-
-
Board Meetings:
-
-
January 18
-
-
January 18
-
-
February 15
-
-
February 15
-
**March 1March 1 **
-
***March 19
-
-
***March 19
-
-
*April 18
-
-
*April 18
-
-
May 17
-
-
May 17
-
-
June 21
-
-
June 21
-
-
July 19
-
-
July 19
-
-
August 16
-
-
August 16
-
-
September 20
-
-
September 20
-
-
*October 18 (October 24th NEW DATE)
-
-
*October 18 (October 24th NEW DATE)
-
-
November 15
-
-
November 15
-
-
December 20
-
-
December 20
-

**Special Board Meeting

-

*Date subject to change

-

***Start time 8:30am CST

-

 All meetings will begin promptly at 9:00am at the
-Illinois International Port District 

-

located at 
-3600 E. 95th St.
-Chicago, IL 60617

-
- - - -
-
-
-
-
- -
-
- - - - - \ No newline at end of file diff --git a/tests/test_il_port_district.py b/tests/test_il_port_district.py deleted file mode 100644 index ea001f182..000000000 --- a/tests/test_il_port_district.py +++ /dev/null @@ -1,159 +0,0 @@ -from datetime import datetime -from os.path import dirname, join - -import pytest -from city_scrapers_core.constants import BOARD, COMMITTEE, PASSED -from city_scrapers_core.utils import file_response -from freezegun import freeze_time - -from city_scrapers.spiders.il_port_district import IlPortDistrictSpider - -agendas_response = file_response( - join(dirname(__file__), "files", "il_port_district_agendas.html"), - url="https://www.iipd.com/calendar/agendas", -) - -minutes_response = file_response( - join(dirname(__file__), "files", "il_port_district_minutes.html"), - url="https://www.iipd.com/about/board-meeting-minutes", -) - -schedules_response = file_response( - join(dirname(__file__), "files", "il_port_district_schedules.html"), - url="https://www.iipd.com/calendar/schedules", -) - -spider = IlPortDistrictSpider() - -freezer = freeze_time("2019-11-22") -freezer.start() - -spider.parse_agendas(agendas_response) -spider.parse_minutes(minutes_response) - -parsed_items = [item for item in spider.parse_schedules(schedules_response)] - -freezer.stop() - - -def test_title(): - assert parsed_items[0]["title"] == "Special Committee Meeting" - assert parsed_items[1]["title"] == "Special Board Meeting" - assert parsed_items[3]["title"] == "Board Meeting" - assert parsed_items[7]["title"] == "Board Meeting" - assert parsed_items[22]["title"] == "Committee Meeting" - - -def test_start(): - assert parsed_items[0]["start"] == datetime(2019, 3, 1, 9, 0) - assert parsed_items[1]["start"] == datetime(2019, 3, 1, 9, 0) - assert parsed_items[3]["start"] == datetime(2019, 1, 18, 9, 0) - assert parsed_items[7]["start"] == datetime(2019, 3, 19, 8, 30) - assert parsed_items[22]["start"] == datetime(2019, 11, 15, 9, 0) - - -def test_end(): - assert parsed_items[0]["end"] is None - assert parsed_items[1]["end"] is None - assert parsed_items[3]["end"] is None - assert parsed_items[7]["end"] is None - assert parsed_items[22]["end"] is None - - -def test_id(): - assert ( - parsed_items[0]["id"] - == "il_port_district/201903010900/x/special_committee_meeting" - ) - assert ( - parsed_items[1]["id"] == "il_port_district/201903010900/x/special_board_meeting" - ) - assert parsed_items[3]["id"] == "il_port_district/201901180900/x/board_meeting" - assert parsed_items[7]["id"] == "il_port_district/201903190830/x/board_meeting" - assert parsed_items[22]["id"] == "il_port_district/201911150900/x/committee_meeting" - - -def test_status(): - assert parsed_items[0]["status"] == PASSED - assert parsed_items[1]["status"] == PASSED - assert parsed_items[3]["status"] == PASSED - assert parsed_items[7]["status"] == PASSED - assert parsed_items[22]["status"] == PASSED - - -def test_location(): - assert parsed_items[0]["location"] == { - "address": "3600 E. 95th St. Chicago, IL 60617", - "name": "Illinois International Port District ", - } - - assert parsed_items[1]["location"] == { - "address": "3600 E. 95th St. Chicago, IL 60617", - "name": "Illinois International Port District ", - } - - assert parsed_items[3]["location"] == { - "address": "3600 E. 95th St. Chicago, IL 60617", - "name": "Illinois International Port District ", - } - - assert parsed_items[7]["location"] == { - "address": "3600 E. 95th St. Chicago, IL 60617", - "name": "Illinois International Port District ", - } - - assert parsed_items[22]["location"] == { - "address": "3600 E. 95th St. Chicago, IL 60617", - "name": "Illinois International Port District ", - } - - -def test_source(): - assert parsed_items[0]["source"] == "https://www.iipd.com/calendar/schedules" - assert parsed_items[1]["source"] == "https://www.iipd.com/calendar/schedules" - assert parsed_items[3]["source"] == "https://www.iipd.com/calendar/schedules" - assert parsed_items[7]["source"] == "https://www.iipd.com/calendar/schedules" - assert parsed_items[22]["source"] == "https://www.iipd.com/calendar/schedules" - - -def test_links(): - # Spider returns https links, but test file sees them as http - assert parsed_items[0]["links"] == [] - - assert parsed_items[1]["links"][0] == { - "href": "http://www.iipd.com/sites/default/files/documents/Bd%20Meeting%20Minutes%20_%20Special%20Bd%20Meeting_3-1-19.pdf", # noqa - "title": "Board Meeting Minutes", - } - - assert parsed_items[3]["links"][0] == { - "href": "http://www.iipd.com/sites/default/files/documents/Bd%20Meeting%20Minutes%201-18-19.pdf", # noqa - "title": "Board Meeting Minutes", - } - - assert parsed_items[7]["links"][0] == { - "href": "http://www.iipd.com/sites/default/files/documents/Bd%20Meeting%20Minutes%203-19-19.pdf", # noqa - "title": "Board Meeting Minutes", - } - - assert parsed_items[22]["links"][0] == { - "href": "http://www.iipd.com/sites/default/files/documents/L%26A%20Agenda%20November%202019.pdf", # noqa - "title": "Leases and Agreement Committee Agenda November 2019", - } - - assert parsed_items[22]["links"][1] == { - "href": "http://www.iipd.com/sites/default/files/documents/F%26P%20Agenda%20November%202019.pdf", # noqa - "title": "Finance and Personnel Committee Agenda November 2019", - } - - -def test_classification(): - assert parsed_items[0]["classification"] == COMMITTEE - assert parsed_items[1]["classification"] == BOARD - assert parsed_items[3]["classification"] == BOARD - assert parsed_items[7]["classification"] == BOARD - assert parsed_items[22]["classification"] == COMMITTEE - - -@pytest.mark.parametrize("item", parsed_items) -def test_all_day(item): - assert item["all_day"] is False