From 1bccdd97d18b66711acdfc4b21623123b921a1ca Mon Sep 17 00:00:00 2001 From: showerst Date: Sat, 23 Nov 2024 20:14:16 -0500 Subject: [PATCH] TN: Events: fixes for some TZ issues (#5096) --- scrapers/tn/events.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/scrapers/tn/events.py b/scrapers/tn/events.py index de424572b7..1c9c7e5df9 100644 --- a/scrapers/tn/events.py +++ b/scrapers/tn/events.py @@ -5,6 +5,7 @@ from utils.events import match_coordinates from spatula import HtmlPage, URL, XPath, SelectorError, PdfPage +import dateutil import pytz import re @@ -54,6 +55,7 @@ def process_page(self): class TNEventScraper(Scraper, LXMLMixin): _tz = pytz.timezone("US/Central") _utc = pytz.timezone("UTC") + _tzmapping = {"CST": "US/Central"} def scrape(self, chamber=None): if chamber: @@ -112,8 +114,6 @@ def scrape_chamber(self, chamber=None): location = f"{location}, 600 Dr. Martin L King, Jr. Blvd, Nashville, TN 37243" description = metainf["type"].text_content() - dtfmt = "%A, %B %d, %Y %I:%M %p" - dtfmt_no_time = "%A, %B %d, %Y" # skipping cancelled here instead of setting a status, because # they clear the time on canceled events so we can't look them up if time == "Cancelled": @@ -132,12 +132,11 @@ def scrape_chamber(self, chamber=None): continue datetime_string = datetime_string.strip() - - try: - when = dt.datetime.strptime(datetime_string, dtfmt) - except ValueError: - when = dt.datetime.strptime(datetime_string, dtfmt_no_time) - when = self._utc.localize(when) + when = dateutil.parser.parse( + datetime_string, tzinfos=self._tzmapping + ) + if when.tzinfo is None: + when = self._tz.localize(when) event = Event( name=description,