From a9eeaf4abe66bc659718fd454d2956f0c10a572d Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 7 May 2024 15:56:37 -0500 Subject: [PATCH] Fix spider: chi_low_income_housing_trust_fund --- .../chi_low_income_housing_trust_fund.py | 199 ++++----- .../chi_low_income_housing_trust_fund.ics | 387 ++++++++++++++++++ .../test_chi_low_income_housing_trust_fund.py | 77 ++-- 3 files changed, 513 insertions(+), 150 deletions(-) create mode 100644 tests/files/chi_low_income_housing_trust_fund.ics diff --git a/city_scrapers/spiders/chi_low_income_housing_trust_fund.py b/city_scrapers/spiders/chi_low_income_housing_trust_fund.py index 0d6ac9737..68025ee65 100644 --- a/city_scrapers/spiders/chi_low_income_housing_trust_fund.py +++ b/city_scrapers/spiders/chi_low_income_housing_trust_fund.py @@ -1,137 +1,108 @@ import re -from datetime import datetime +from datetime import date, datetime -import scrapy +import pytz from city_scrapers_core.constants import BOARD, COMMITTEE, NOT_CLASSIFIED from city_scrapers_core.items import Meeting from city_scrapers_core.spiders import CityScrapersSpider +from icalendar import Calendar class ChiLowIncomeHousingTrustFundSpider(CityScrapersSpider): name = "chi_low_income_housing_trust_fund" agency = "Chicago Low-Income Housing Trust Fund" timezone = "America/Chicago" - start_urls = ["http://www.clihtf.org/about-us/upcomingevents/"] + start_urls = ["https://clihtf.org/?post_type=tribe_events&ical=1&eventDisplay=list"] def parse(self, response): """ - `parse` should always `yield` Meeting items. - - Change the `_parse_title`, `_parse_start`, etc methods to fit your scraping - needs. - """ - items = self._parse_calendar(response) - for item in items: - # Drop empty links - if "http" not in item["source"]: - continue - - req = scrapy.Request( - item["source"], - callback=self._parse_detail, - dont_filter=True, - ) - req.meta["item"] = item - yield req - - # Only go to the next page once, so if query parameters are set, exit - if "?month" not in response.url: - yield self._parse_next(response) - - def _parse_next(self, response): - """ - Get next page. You must add logic to `next_url` and - return a scrapy request. + Parse the .ics file and handle data irregularities. """ - next_url = response.css(".calendar-next a::attr(href)").extract_first() - return scrapy.Request(next_url, callback=self.parse, dont_filter=True) - - def _parse_calendar(self, response): - """Parse items on the main calendar page""" - items = [] - for item in response.css( - ".day-with-date:not(.no-events), .current-day:not(.no-events)" - ): - title = self._parse_title(item) - if "training" in title.lower(): - continue - description = self._parse_description(item) - items.append( - Meeting( - title=title, - description=description, - classification=self._parse_classification(title), - all_day=False, - links=[], + cleaned_content = self.clean_ics_data(response.text) + try: + cal = Calendar.from_ical(cleaned_content) + except Exception as e: + self.logger.error("Error parsing iCalendar data: %s", e) + self.logger.error( + "Response content: %s", response.text[:500] + ) # Log first 500 chars + raise + + for component in cal.walk(): + # This agency has many 'Administrative Day' events that + # are not actual meetings + if ( + component.name == "VEVENT" + and "Administrative Day" not in component.get("summary") + ): + meeting = Meeting( + title=component.get("summary").strip(), + description=component.get("description", "").strip() or "", + classification=self._parse_classification(component.get("summary")), + start=self._to_naive(component.get("dtstart").dt), + end=self._to_naive(component.get("dtend").dt), + all_day=self._is_all_day( + component.get("dtstart").dt, component.get("dtend").dt + ), time_notes="", - source=self._parse_source(item, response.url), + location=self._parse_location(component), + links=[ + { + "href": component.get("url", "").strip(), + "title": "Event Details", + } + ], + source=response.url, ) - ) - return items - - def _parse_detail(self, response): - """Parse detail page for additional information""" - meeting = response.meta.get("item", {}) - meeting.update(self._parse_start_end_time(response)) - meeting["location"] = self._parse_location(response) - meeting["status"] = self._get_status(meeting) - meeting["id"] = self._get_id(meeting) - return meeting - - def _parse_title(self, item): - """Parse or generate event title""" - return item.css(".event-title::text").extract_first() - - def _parse_description(self, item): - """Parse or generate event description""" - return ( - item.xpath( - './/span[@class="event-content-break"]/following-sibling::text()' - ).extract_first() - or "" + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + yield meeting + + def clean_ics_data(self, ics_content): + """Handles a quirk in the ICS file where VTIMEZONE blocks are formatted + improperly and cause icalendar parsing errors.""" + normalized_content = ics_content.replace("\r\n", "\n") + cleaned_content = re.sub( + r"BEGIN:VTIMEZONE.*?END:VTIMEZONE\n", + "", + normalized_content, + flags=re.DOTALL, ) + return cleaned_content def _parse_classification(self, title): - """Parse or generate classification (e.g. board, committee, etc)""" - if "board" in title.lower(): - return BOARD - if "committe" in title.lower(): + if "committee" in title.lower(): return COMMITTEE + elif "board" in title.lower(): + return BOARD return NOT_CLASSIFIED - def _parse_start_end_time(self, response): - """Parse start and end datetimes""" - time_str = response.css(".cc-panel .cc-block > span::text").extract_first() - time_str = re.sub(r"\s+", " ", time_str) - date_str = re.search(r"(?<=day, ).*(?= fro)", time_str).group().strip() - start_str = re.search(r"(?<=from ).*(?= to)", time_str).group().strip() - end_str = re.search(r"(?<=to ).*(?= \w{3})", time_str).group().strip() - date_obj = datetime.strptime(date_str, "%B %d, %Y").date() - start_time = datetime.strptime(start_str, "%I:%M %p").time() - end_time = datetime.strptime(end_str, "%I:%M %p").time() - return { - "start": datetime.combine(date_obj, start_time), - "end": datetime.combine(date_obj, end_time), - } - - def _parse_location(self, response): - """Parse or generate location""" - addr_sel = response.css( - ".cc-panel .cc-block:nth-child(2) > span:nth-of-type(2)::text" - ) - if not addr_sel: - addr_sel = response.css("#span_event_where_multiline p:first-of-type::text") - addr_lines = addr_sel.extract() - return { - "address": " ".join( - [re.sub(r"\s+", " ", line).strip() for line in addr_lines] - ), - "name": "", - } - - def _parse_source(self, item, response_url): - """Parse or generate source""" - item_link = item.css(".calnk > a::attr(href)").extract_first() - if item_link: - return item_link - return response_url + def _to_naive(self, dt): + """Convert timezone-aware datetime to naive datetime in the local timezone, + or return the date object if it's a date.""" + print("dt: ", dt) + local_timezone = pytz.timezone( + self.timezone + ) # Ensure you are using the spider's timezone + if isinstance(dt, datetime): + if dt.tzinfo is not None: + return dt.astimezone(local_timezone).replace(tzinfo=None) + return dt + elif isinstance(dt, date): + # Convert date to datetime for uniform handling + return datetime.combine(dt, datetime.min.time(), tzinfo=None) + return dt + + def _is_all_day(self, start, end): + """Check if the event is an all-day event.""" + return type(start) is date and (end - start).days == 1 + + def _parse_location(self, component): + """Parse or generate location.""" + location = component.get("location", "") + if not location: + return { + "name": "Chicago Low-Income Housing Trust Fund", + "address": "77 West Washington Street, Suite 719, Chicago, IL 60602", + } + return {"name": location, "address": ""} diff --git a/tests/files/chi_low_income_housing_trust_fund.ics b/tests/files/chi_low_income_housing_trust_fund.ics new file mode 100644 index 000000000..d9104a788 --- /dev/null +++ b/tests/files/chi_low_income_housing_trust_fund.ics @@ -0,0 +1,387 @@ +BEGIN:VCALENDAR +VERSION:2.0 +PRODID:-//CLIHTF - ECPv6.4.0.1//NONSGML v1.0//EN +CALSCALE:GREGORIAN +METHOD:PUBLISH +X-WR-CALNAME:CLIHTF +X-ORIGINAL-URL:https://clihtf.org +X-WR-CALDESC:Events for CLIHTF +REFRESH-INTERVAL;VALUE=DURATION:PT1H +X-Robots-Tag:noindex +X-PUBLISHED-TTL:PT1H +BEGIN:VTIMEZONE +TZID:UTC +BEGIN:STANDARD +TZOFFSETFROM:+0000 +TZOFFSETTO:+0000 +TZNAME:UTC +DTSTART:20240101T000000 +END:STANDARD +TZID:America/Chicago +BEGIN:DAYLIGHT +TZOFFSETFROM:-0600 +TZOFFSETTO:-0500 +TZNAME:CDT +DTSTART:20240310T080000 +END:DAYLIGHT +BEGIN:STANDARD +TZOFFSETFROM:-0500 +TZOFFSETTO:-0600 +TZNAME:CST +DTSTART:20241103T070000 +END:STANDARD +END:VTIMEZONE +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240508 +DTEND;VALUE=DATE:20240509 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9972125171-0046215171-34000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-05-08/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240509T083000 +DTEND;TZID=America/Chicago:20240509T093000 +DTSTAMP:20240507T154440 +CREATED:20240502T153248Z +LAST-MODIFIED:20240502T153248Z +UID:gro.fthilc@0007425171-0043425171-39200001 +SUMMARY:Outreach Meeting +DESCRIPTION: +URL:https://clihtf.org/event/outreach-meeting-4/ +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240509T153000 +DTEND;TZID=America/Chicago:20240509T163000 +DTSTAMP:20240507T154440 +CREATED:20240502T153206Z +LAST-MODIFIED:20240502T153206Z +UID:gro.fthilc@0022725171-0068625171-29200001 +SUMMARY:Finance Meeting +DESCRIPTION: +URL:https://clihtf.org/event/finance-meeting-4/ +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240514T083000 +DTEND;TZID=America/Chicago:20240514T093000 +DTSTAMP:20240507T154440 +CREATED:20240502T153350Z +LAST-MODIFIED:20240502T153350Z +UID:gro.fthilc@0009765171-0045765171-49200001 +SUMMARY:Executive Committee Meeting +DESCRIPTION: +URL:https://clihtf.org/event/executive-committee-meeting/ +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240515 +DTEND;VALUE=DATE:20240516 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9957185171-0021375171-44000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-05-15/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240522 +DTEND;VALUE=DATE:20240523 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9932246171-0006336171-54000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-05-22/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240529 +DTEND;VALUE=DATE:20240530 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9917207171-0080496171-64000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-05-29/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240604T140000 +DTEND;TZID=America/Chicago:20240604T150000 +DTSTAMP:20240507T154440 +CREATED:20240502T153539Z +LAST-MODIFIED:20240502T153539Z +UID:gro.fthilc@0023157171-0069057171-59200001 +SUMMARY:Allocations Meeting +DESCRIPTION: +URL:https://clihtf.org/event/allocations-meeting-5/ +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240605 +DTEND;VALUE=DATE:20240606 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9991367171-0065457171-74000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-06-05/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240606T083000 +DTEND;TZID=America/Chicago:20240606T093000 +DTSTAMP:20240507T154440 +CREATED:20240502T153722Z +LAST-MODIFIED:20240502T153722Z +UID:gro.fthilc@0026667171-0062667171-79200001 +SUMMARY:Outreach Meeting +DESCRIPTION: +URL:https://clihtf.org/event/outreach-meeting-5/ +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240606T153000 +DTEND;TZID=America/Chicago:20240606T163000 +DTSTAMP:20240507T154440 +CREATED:20240502T153634Z +LAST-MODIFIED:20240502T153634Z +UID:gro.fthilc@0041967171-0087867171-69200001 +SUMMARY:Finance Meeting +DESCRIPTION: +URL:https://clihtf.org/event/finance-meeting-5/ +END:VEVENT +BEGIN:VEVENT +DTSTART;TZID=America/Chicago:20240611T083000 +DTEND;TZID=America/Chicago:20240611T093000 +DTSTAMP:20240507T154440 +CREATED:20240502T153815Z +LAST-MODIFIED:20240502T153815Z +UID:gro.fthilc@0028908171-0064908171-89200001 +SUMMARY:Executive Committee Meeting +DESCRIPTION: +URL:https://clihtf.org/event/executive-committee-meeting-2/ +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240612 +DTEND;VALUE=DATE:20240613 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9976328171-0040518171-84000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-06-12/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240619 +DTEND;VALUE=DATE:20240620 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9951488171-0025578171-94000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-06-19/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240626 +DTEND;VALUE=DATE:20240627 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9936449171-0000639171-05000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-06-26/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240703 +DTEND;VALUE=DATE:20240704 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9911500271-0084699171-15000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-07-03/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240710 +DTEND;VALUE=DATE:20240711 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9995560271-0069650271-25000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-07-10/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240717 +DTEND;VALUE=DATE:20240718 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9970621271-0044711271-35000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-07-17/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240724 +DTEND;VALUE=DATE:20240725 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9955681271-0029771271-45000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-07-24/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240731 +DTEND;VALUE=DATE:20240801 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9930742271-0004832271-55000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-07-31/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240807 +DTEND;VALUE=DATE:20240808 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9915703271-0088892271-65000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-08-07/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240814 +DTEND;VALUE=DATE:20240815 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9999763271-0063953271-75000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-08-14/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240821 +DTEND;VALUE=DATE:20240822 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9974824271-0048914271-85000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-08-21/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240828 +DTEND;VALUE=DATE:20240829 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9959884271-0023084271-95000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-08-28/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240904 +DTEND;VALUE=DATE:20240905 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9934945271-0008045271-06000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-09-04/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240911 +DTEND;VALUE=DATE:20240912 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9919906271-0082106271-16000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-09-11/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240918 +DTEND;VALUE=DATE:20240919 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9993076271-0067166271-26000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-09-18/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20240925 +DTEND;VALUE=DATE:20240926 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9978037271-0042227271-36000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-09-25/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20241002 +DTEND;VALUE=DATE:20241003 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9953197271-0027287271-46000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-10-02/ +CATEGORIES:Administrative Day +END:VEVENT +BEGIN:VEVENT +DTSTART;VALUE=DATE:20241009 +DTEND;VALUE=DATE:20241010 +DTSTAMP:20240507T154440 +CREATED:20230731T203806Z +LAST-MODIFIED:20230731T203807Z +UID:gro.fthilc@9938158271-0002348271-56000001 +SUMMARY:Administrative Day +DESCRIPTION:Office Closed +URL:https://clihtf.org/event/administrative-day-2-2/2024-10-09/ +CATEGORIES:Administrative Day +END:VEVENT +END:VCALENDAR \ No newline at end of file diff --git a/tests/test_chi_low_income_housing_trust_fund.py b/tests/test_chi_low_income_housing_trust_fund.py index 6b5a88f35..0d71b06b1 100644 --- a/tests/test_chi_low_income_housing_trust_fund.py +++ b/tests/test_chi_low_income_housing_trust_fund.py @@ -1,8 +1,8 @@ from datetime import datetime from os.path import dirname, join -import pytest -from city_scrapers_core.constants import BOARD, COMMITTEE, PASSED +import pytest # noqa +from city_scrapers_core.constants import NOT_CLASSIFIED, TENTATIVE from city_scrapers_core.utils import file_response from freezegun import freeze_time @@ -10,73 +10,78 @@ ChiLowIncomeHousingTrustFundSpider, ) -freezer = freeze_time("2018-10-31") +# Simulate the current date at the time of testing +freezer = freeze_time("2024-05-07") freezer.start() + +# Initialize the spider spider = ChiLowIncomeHousingTrustFundSpider() +# Simulate a file response with a sample ics file cal_res = file_response( - join(dirname(__file__), "files", "chi_low_income_housing_trust_fund.html") + join(dirname(__file__), "files", "chi_low_income_housing_trust_fund.ics"), + url="https://clihtf.org/?post_type=tribe_events&ical=1&eventDisplay=list", ) -parsed_items = [] -for item in spider._parse_calendar(cal_res): - detail_res = file_response( - join( - dirname(__file__), "files", "chi_low_income_housing_trust_fund_detail.html" - ) - ) - detail_res.meta["item"] = item - parsed_items.append(spider._parse_detail(detail_res)) +parsed_items = [item for item in spider.parse(cal_res)] + freezer.stop() +# Test for event title def test_title(): - assert parsed_items[0]["title"] == "Finance Committee" - assert parsed_items[1]["title"] == "Allocations Committee" - assert parsed_items[2]["title"] == "Board Meeting" + assert parsed_items[0]["title"] == "Outreach Meeting" +# Test for event start datetime def test_start(): - assert parsed_items[0]["start"] == datetime(2018, 10, 4, 10, 0) + assert parsed_items[0]["start"] == datetime(2024, 5, 9, 8, 30) +# Test for event end datetime def test_end(): - assert parsed_items[0]["end"] == datetime(2018, 10, 4, 11, 0) + assert parsed_items[0]["end"] == datetime(2024, 5, 9, 9, 30) +# Test for unique event ID def test_id(): - assert parsed_items[0]["id"] == ( - "chi_low_income_housing_trust_fund/201810041000/x/finance_committee" + assert ( + parsed_items[0]["id"] + == "chi_low_income_housing_trust_fund/202405090830/x/outreach_meeting" ) +# Test for classification of the event def test_classification(): - assert parsed_items[0]["classification"] == COMMITTEE - assert parsed_items[2]["classification"] == BOARD + assert parsed_items[0]["classification"] == NOT_CLASSIFIED +# Test for event status def test_status(): - assert parsed_items[0]["status"] == PASSED + assert parsed_items[0]["status"] == TENTATIVE +# Test for event description def test_description(): - assert parsed_items[0]["description"] == ( - "Meeting of the CLIHTF Finance Committee. To attend, send Name and " - "Planned Attendance Date to info@chicagotrustfund.org. Regular " - "Meeting Location: Chicago City Hall, Rm. 1006c." - ) + assert parsed_items[0]["description"] == "" +# Test for location details def test_location(): assert parsed_items[0]["location"] == { - "address": "121 N. La Salle - Room 1006 Chicago, IL 60602", - "name": "", + "name": "Chicago Low-Income Housing Trust Fund", + "address": "77 West Washington Street, Suite 719, Chicago, IL 60602", } -@pytest.mark.parametrize("item", parsed_items) -def test_links(item): - assert item["links"] == [] +# Test for links associated with the event +def test_links(): + assert parsed_items[0]["links"] == [ + { + "href": "https://clihtf.org/event/outreach-meeting-4/", + "title": "Event Details", + } + ] -@pytest.mark.parametrize("item", parsed_items) -def test_all_day(item): - assert item["all_day"] is False +# Test if the event is marked as all day +def test_all_day(): + assert parsed_items[0]["all_day"] is False