From 57214525b81c66ad2c0daae79488702f48984585 Mon Sep 17 00:00:00 2001 From: Devin Matte Date: Mon, 29 Jan 2024 20:53:21 -0500 Subject: [PATCH] Shows gobble data in the dashboard (#943) * Shows gobble data in the dashboard Only shows gobble data when manually navigating to a bus url with a date past the current maximum * Fixing rapid parsing * Fixing format for headways * Updating based on feedback --- common/components/notices/BusDataNotice.tsx | 6 +- .../components/notices/GobbleDataNotice.tsx | 61 ++++++++++++ common/components/notices/SameDayNotice.tsx | 2 +- common/components/notices/TerminusNotice.tsx | 2 +- common/components/widgets/WidgetPage.tsx | 2 - common/constants/dates.ts | 2 +- modules/ridership/RidershipDetails.tsx | 3 +- modules/ridership/RidershipWidget.tsx | 3 +- modules/tripexplorer/TripExplorer.tsx | 10 +- server/chalicelib/s3.py | 42 ++++++-- server/chalicelib/s3_historical.py | 8 +- server/rapid/process_events.py | 1 - server/rapid/setup_rapid_input.sh | 2 +- server/scripts/generate_line_files.py | 98 +++++++++++++------ styles/dashboard.css | 5 + 15 files changed, 191 insertions(+), 56 deletions(-) create mode 100644 common/components/notices/GobbleDataNotice.tsx diff --git a/common/components/notices/BusDataNotice.tsx b/common/components/notices/BusDataNotice.tsx index 384b74fa7..a01184f61 100644 --- a/common/components/notices/BusDataNotice.tsx +++ b/common/components/notices/BusDataNotice.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; -import { faMagnifyingGlassChart } from '@fortawesome/free-solid-svg-icons'; +import { faLocationCrosshairs } from '@fortawesome/free-solid-svg-icons'; import { useDelimitatedRoute } from '../../utils/router'; export const BusDataNotice: React.FC = () => { @@ -9,8 +9,8 @@ export const BusDataNotice: React.FC = () => { if (line === 'line-bus' || linePath === 'bus') { return (
- -
+ +

Due to data collection issues, bus data is not guaranteed to be complete for any stop or date. diff --git a/common/components/notices/GobbleDataNotice.tsx b/common/components/notices/GobbleDataNotice.tsx new file mode 100644 index 000000000..374cc5a2f --- /dev/null +++ b/common/components/notices/GobbleDataNotice.tsx @@ -0,0 +1,61 @@ +import React from 'react'; +import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; +import dayjs from 'dayjs'; +import { faChartSimple } from '@fortawesome/free-solid-svg-icons'; +import Link from 'next/link'; +import classNames from 'classnames'; +import { useDelimitatedRoute } from '../../utils/router'; +import { BUS_MAX_DAY } from '../../constants/dates'; +import { lineColorTextHover } from '../../styles/general'; + +export const GobbleDataNotice: React.FC = () => { + const { + line, + linePath, + query: { date, startDate, endDate }, + } = useDelimitatedRoute(); + + const isStartDateAfterBusMaxDay = + (startDate !== undefined && dayjs(startDate).isAfter(BUS_MAX_DAY)) || + (date !== undefined && dayjs(date).isAfter(BUS_MAX_DAY)); + const isEndDateAfterBusMaxDay = endDate !== undefined && dayjs(endDate).isAfter(BUS_MAX_DAY); + + if ( + (line === 'line-bus' || linePath === 'bus') && + (isStartDateAfterBusMaxDay || isEndDateAfterBusMaxDay) + ) { + return ( +

+ +
+

+ Data shown here is collected by TransitMatters using the{' '} + + MBTA's streaming API + + . Unlike other data sources we show, this data is not cleaned or filtered in any way + before display. Innacuracies may be present. +

+

+ Official MBTA data will be shown when available. Technical details of our data + collection can be found{' '} + + here + +

+
+
+ ); + } + return null; +}; diff --git a/common/components/notices/SameDayNotice.tsx b/common/components/notices/SameDayNotice.tsx index 09c2fc8c6..076f09147 100644 --- a/common/components/notices/SameDayNotice.tsx +++ b/common/components/notices/SameDayNotice.tsx @@ -14,7 +14,7 @@ export const SameDayNotice: React.FC = () => { return (
-
+

Due to data not being cleaned yet, today's data may not be fully accurate and may look messy. diff --git a/common/components/notices/TerminusNotice.tsx b/common/components/notices/TerminusNotice.tsx index 009ca6e74..98f6e00e8 100644 --- a/common/components/notices/TerminusNotice.tsx +++ b/common/components/notices/TerminusNotice.tsx @@ -14,7 +14,7 @@ export const TerminusNotice: React.FC = ({ toStation, fromS return (

-
+

Due to data collection issues at terminus stations, data is not guaranteed to be complete. diff --git a/common/components/widgets/WidgetPage.tsx b/common/components/widgets/WidgetPage.tsx index 4456f4cb2..8e45da6e4 100644 --- a/common/components/widgets/WidgetPage.tsx +++ b/common/components/widgets/WidgetPage.tsx @@ -1,6 +1,5 @@ import classNames from 'classnames'; import React from 'react'; -import { BusDataNotice } from '../notices/BusDataNotice'; interface WidgetPageProps { children?: React.ReactNode; @@ -10,7 +9,6 @@ export const WidgetPage: React.FC = ({ children }) => { return (

{children} -
); }; diff --git a/common/constants/dates.ts b/common/constants/dates.ts index fc7f1062d..279cd28b3 100644 --- a/common/constants/dates.ts +++ b/common/constants/dates.ts @@ -36,7 +36,7 @@ const OVERVIEW_TRAIN_MIN_DATE = '2016-02-01'; const TRAIN_MIN_DATE = '2016-01-15'; const BUS_MIN_DATE = '2018-08-01'; export const BUS_MAX_DATE = '2023-12-31'; -const BUS_MAX_DAY = dayjs(BUS_MAX_DATE); +export const BUS_MAX_DAY = dayjs(BUS_MAX_DATE); export const BUS_MAX_DATE_MINUS_ONE_WEEK = dayjs(BUS_MAX_DATE) .subtract(7, 'days') .format(DATE_FORMAT); diff --git a/modules/ridership/RidershipDetails.tsx b/modules/ridership/RidershipDetails.tsx index e9abe39f3..3fbe3bd2f 100644 --- a/modules/ridership/RidershipDetails.tsx +++ b/modules/ridership/RidershipDetails.tsx @@ -18,7 +18,6 @@ export function RidershipDetails() { } = useDelimitatedRoute(); const config = SPEED_RANGE_PARAM_MAP.week; const lineId = getRidershipLineId(line, busRoute); - const lineOrRoute = busRoute ? `line-${busRoute.replaceAll('/', '')}` : line; const enabled = Boolean(startDate && endDate && lineId); const ridership = useRidershipData( @@ -29,7 +28,7 @@ export function RidershipDetails() { }, enabled ); - const ridershipDataReady = !ridership.isError && startDate && endDate && lineOrRoute && line; + const ridershipDataReady = !ridership.isError && startDate && endDate && line; return ( diff --git a/modules/ridership/RidershipWidget.tsx b/modules/ridership/RidershipWidget.tsx index 1243fb590..1a5cce7ed 100644 --- a/modules/ridership/RidershipWidget.tsx +++ b/modules/ridership/RidershipWidget.tsx @@ -16,13 +16,12 @@ export const RidershipWidget: React.FC = () => { const endDate = TODAY_STRING; const config = getSpeedGraphConfig(dayjs(startDate), dayjs(endDate)); const lineId = getRidershipLineId(line, query.busRoute); - const lineOrRoute = query.busRoute ? `line-${query.busRoute.replaceAll('/', '')}` : line; const ridership = useRidershipData({ line_id: lineId, start_date: startDate, end_date: endDate, }); - const serviceReady = !ridership.isError && lineId && line && lineOrRoute; + const serviceReady = !ridership.isError && lineId && line; return ( diff --git a/modules/tripexplorer/TripExplorer.tsx b/modules/tripexplorer/TripExplorer.tsx index b87ec16c0..bf3546e1a 100644 --- a/modules/tripexplorer/TripExplorer.tsx +++ b/modules/tripexplorer/TripExplorer.tsx @@ -9,6 +9,8 @@ import { ChartPageDiv } from '../../common/components/charts/ChartPageDiv'; import { Layout } from '../../common/layouts/layoutTypes'; import { useDelimitatedRoute } from '../../common/utils/router'; import { getParentStationForStopId } from '../../common/utils/stations'; +import { BusDataNotice } from '../../common/components/notices/BusDataNotice'; +import { GobbleDataNotice } from '../../common/components/notices/GobbleDataNotice'; import { useAlertStore } from './AlertStore'; import { TripGraphs } from './TripGraphs'; @@ -37,8 +39,12 @@ export const TripExplorer = () => { {alertsForModal?.length ? : null} - - +
+ + + + +
); diff --git a/server/chalicelib/s3.py b/server/chalicelib/s3.py index 042ce13a4..c529fa88e 100644 --- a/server/chalicelib/s3.py +++ b/server/chalicelib/s3.py @@ -28,16 +28,27 @@ def upload(key, bytes, compress=True): s3.put_object(Bucket=BUCKET, Key=key, Body=bytes) -def is_bus(stop_id): +def is_bus(stop_id: str): return ("-0-" in stop_id) or ("-1-" in stop_id) -def download_one_event_file(date, stop_id): +def get_live_folder(stop_id: str): + if is_bus(stop_id): + return "daily-bus-data" + else: + return "daily-rapid-data" + + +def download_one_event_file(date, stop_id: str, use_live_data=False): """As advertised: single event file from s3""" - year, month = date.year, date.month + year, month, day = date.year, date.month, date.day - folder = "monthly-bus-data" if is_bus(stop_id) else "monthly-data" - key = f"Events/{folder}/{stop_id}/Year={year}/Month={month}/events.csv.gz" + if use_live_data: + folder = get_live_folder(stop_id) + key = f"Events-live/{folder}/{stop_id}/Year={year}/Month={month}/Day={day}/events.csv.gz" + else: + folder = "monthly-bus-data" if is_bus(stop_id) else "monthly-data" + key = f"Events/{folder}/{stop_id}/Year={year}/Month={month}/events.csv.gz" # Download events from S3 try: @@ -47,6 +58,8 @@ def download_one_event_file(date, stop_id): if ex.response["Error"]["Code"] == "NoSuchKey": # raise Exception(f"Data not available on S3 for key {key} ") from None print(f"WARNING: No data available on S3 for key: {key}") + if not use_live_data and is_bus(stop_id): + return download_one_event_file(date, stop_id, use_live_data=True) return [] else: raise @@ -67,8 +80,23 @@ def parallel_download_events(datestop): return download_one_event_file(date, stop) -def download_events(sdate, edate, stops): - datestops = itertools.product(parallel.month_range(sdate, edate), stops) +def download_events(sdate, edate, stops: list): + # This used to be month_range but updated to date_range to support live ranges + # If something breaks, this may be why + datestops = itertools.product(parallel.date_range(sdate, edate), stops) result = parallel_download_events(datestops) result = filter(lambda row: sdate.strftime("%Y-%m-%d") <= row["service_date"] <= edate.strftime("%Y-%m-%d"), result) return sorted(result, key=lambda row: row["event_time"]) + + +def get_all_s3_objects(s3, **base_kwargs): + continuation_token = None + while True: + list_kwargs = dict(MaxKeys=1000, **base_kwargs) + if continuation_token: + list_kwargs["ContinuationToken"] = continuation_token + response = s3.list_objects_v2(**list_kwargs) + yield from response.get("Contents", []) + if not response.get("IsTruncated"): # At the end of the list? + break + continuation_token = response.get("NextContinuationToken") diff --git a/server/chalicelib/s3_historical.py b/server/chalicelib/s3_historical.py index df64d4374..f0346f662 100644 --- a/server/chalicelib/s3_historical.py +++ b/server/chalicelib/s3_historical.py @@ -32,7 +32,7 @@ def unique_everseen(iterable, key=None): yield element -def dwells(stop_ids, sdate, edate): +def dwells(stop_ids: list, sdate, edate): rows_by_time = s3.download_events(sdate, edate, stop_ids) dwells = [] @@ -59,7 +59,7 @@ def dwells(stop_ids, sdate, edate): return dwells -def headways(stop_ids, sdate, edate): +def headways(stop_ids: list, sdate, edate): rows_by_time = s3.download_events(sdate, edate, stop_ids) only_departures = filter(lambda row: row["event_type"] in EVENT_DEPARTURE, rows_by_time) @@ -88,7 +88,7 @@ def headways(stop_ids, sdate, edate): { "route_id": this["route_id"], "direction": this["direction_id"], - "current_dep_dt": this["event_time"], + "current_dep_dt": date_utils.return_formatted_date(this_dt), "headway_time_sec": headway_time_sec, "benchmark_headway_time_sec": benchmark_headway, } @@ -97,7 +97,7 @@ def headways(stop_ids, sdate, edate): return headways -def travel_times(stops_a, stops_b, sdate, edate): +def travel_times(stops_a: list, stops_b: list, sdate, edate): rows_by_time_a = s3.download_events(sdate, edate, stops_a) rows_by_time_b = s3.download_events(sdate, edate, stops_b) diff --git a/server/rapid/process_events.py b/server/rapid/process_events.py index 6c96d0ef8..0370ab622 100644 --- a/server/rapid/process_events.py +++ b/server/rapid/process_events.py @@ -21,7 +21,6 @@ def process_events(input_csv, outdir, nozip=False): input_csv, usecols=columns, parse_dates=["service_date"], - infer_datetime_format=True, dtype={ "route_id": "str", "trip_id": "str", diff --git a/server/rapid/setup_rapid_input.sh b/server/rapid/setup_rapid_input.sh index 79a4af751..90994d0e4 100755 --- a/server/rapid/setup_rapid_input.sh +++ b/server/rapid/setup_rapid_input.sh @@ -14,7 +14,7 @@ wget -N -O data/input/2023.zip https://www.arcgis.com/sharing/rest/content/items cd data/input for i in `seq 2017 2023`; do - unzip -d $i $i.zip + unzip -o -d $i $i.zip done # The following years only have single csv files diff --git a/server/scripts/generate_line_files.py b/server/scripts/generate_line_files.py index ee8bf5669..52f0d5f32 100644 --- a/server/scripts/generate_line_files.py +++ b/server/scripts/generate_line_files.py @@ -4,6 +4,15 @@ import boto3 import botocore +from chalicelib.s3 import get_all_s3_objects + +""" +To run, run as + +poetry shell +python -m scripts.generate_line_files +""" + MBTA_V3_API_KEY = os.environ.get("MBTA_V3_API_KEY", "") BUCKET = "tm-mbta-performance" @@ -20,52 +29,57 @@ "CR-Needham", "CR-Newburyport", "CR-Providence", - "CR-Foxboro", ] def get_line_stops(): s3 = boto3.client("s3", config=botocore.client.Config(max_pool_connections=15)) - objects = s3.list_objects_v2(Bucket=BUCKET, Prefix="Events-live/daily-cr-data/{}".format(LINE_KEY)) + objects = [] + + for file in get_all_s3_objects(s3, Bucket=BUCKET, Prefix="Events-live/daily-cr-data/{}".format(LINE_KEY)): + objects.append(file) + + stop_names = set() - stop_ids = set() + for obj in objects: + stop_names.add(obj["Key"].split("/")[2]) - for obj in objects["Contents"]: - stop_ids.add(parse_s3_cr_uri(obj)[2]) + stop_ids = [] + + stop_prefix_len = len(LINE_KEY) + 3 + for stop in stop_names: + stop_ids.append({"id": stop[stop_prefix_len:], "name": stop}) parent_children_map = {} for stop in stop_ids: + _, direction, stop_id = parse_stop_name(stop["name"]) + r_f = requests.get( - "https://api-v3.mbta.com/stops/{}?include=parent_station&api_key={}".format(stop["id"], MBTA_V3_API_KEY) + "https://api-v3.mbta.com/stops/{}?include=parent_station&api_key={}".format(stop_id, MBTA_V3_API_KEY) ) stop_details = r_f.json() parent_id = stop_details["data"]["relationships"]["parent_station"]["data"]["id"] if parent_id not in parent_children_map: - if stop["direction"] == "0": - parent_children_map[parent_id] = {"0": [stop["stop_id"]], "1": []} + if direction == "0": + parent_children_map[parent_id] = {"0": [stop["name"]], "1": []} else: - parent_children_map[parent_id] = {"0": [], "1": [stop["stop_id"]]} + parent_children_map[parent_id] = {"0": [], "1": [stop["name"]]} else: - parent_children_map[parent_id][stop["direction"]].append(stop["stop_id"]) + parent_children_map[parent_id][direction].append(stop["name"]) return parent_children_map -def parse_s3_cr_uri(uri: str): +def parse_stop_name(stop_name: str): """ - Parse a CR s3 URI beginning with Events-live + Parse a CR stop id into its components """ - _, _, stop_name, year, month, day, _ = uri["Key"].split("/") line, direction, stop_id = stop_name.split("_") - return {"line": line, "direction": direction, "stop_id": stop_id, "year": year, "month": month, "day": day} - - -def cr_stop_info_to_s3_prefix(line, direction, stop_id): - return "{}_{}_{}".format(line, direction, stop_id) + return line, direction, stop_id for LINE_KEY in ROUTES_CR: @@ -74,16 +88,42 @@ def cr_stop_info_to_s3_prefix(line, direction, stop_id): stop_layout = get_line_stops() - stops_formatted = [ - { - "stop_name": stop["attributes"]["name"], - "station": stop["id"], - "branches": None, - "order": index + 1, - "stops": stop_layout[stop["id"]], - } - for index, stop in enumerate(stops["data"]) - ] + stops_formatted = [] + + for index, stop in enumerate(stops["data"]): + try: + stops_formatted.append( + { + "stop_name": stop["attributes"]["name"], + "station": stop["id"], + "branches": None, + "order": index + 1, + "stops": stop_layout[stop["id"]], + } + ) + except KeyError: + c_f = requests.get( + "https://api-v3.mbta.com/stops/{}?include=child_stops&api_key={}".format(stop["id"], MBTA_V3_API_KEY) + ) + stop_details = c_f.json() + + child_stops = [ + child_stop["id"] for child_stop in stop_details["data"]["relationships"]["child_stops"]["data"] + ] + stops_map = { + "0": [f"{LINE_KEY}_0_{stop}" for stop in child_stops], + "1": [f"{LINE_KEY}_1_{stop}" for stop in child_stops], + } + + stops_formatted.append( + { + "stop_name": stop["attributes"]["name"], + "station": stop["id"], + "branches": None, + "order": index + 1, + "stops": stops_map, + } + ) output = { LINE_KEY: { @@ -94,5 +134,5 @@ def cr_stop_info_to_s3_prefix(line, direction, stop_id): } out_json = json.dumps(output, indent=2) - with open("../common/constants/cr_constants/{}.json".format(LINE_KEY.lower()), "w") as f: + with open("../common/constants/cr_constants/{}.json".format(LINE_KEY.lower()), "w+") as f: f.write(out_json) diff --git a/styles/dashboard.css b/styles/dashboard.css index 1280f2e34..7aefb6d11 100644 --- a/styles/dashboard.css +++ b/styles/dashboard.css @@ -158,4 +158,9 @@ .selected-date-line-bus { background: #FFE395 !important; border-color: #FFE395 !important; +} + +.selected-date-line-commuter-rail { + background: #a95896 !important; + border-color: #a95896 !important; } \ No newline at end of file