Skip to content

Commit

Permalink
Merge pull request #1370 from cal-itp/ah_gtfs
Browse files Browse the repository at this point in the history
Adding back rail routes
  • Loading branch information
amandaha8 authored Feb 18, 2025
2 parents 27991ba + cb5314b commit 34830ba
Show file tree
Hide file tree
Showing 11 changed files with 30,391 additions and 192 deletions.
779 changes: 717 additions & 62 deletions gtfs_digest/03_report.ipynb

Large diffs are not rendered by default.

26,958 changes: 26,840 additions & 118 deletions gtfs_digest/45_missing_routes2.ipynb

Large diffs are not rendered by default.

2,028 changes: 2,028 additions & 0 deletions gtfs_digest/46_rail_routes.ipynb

Large diffs are not rendered by default.

727 changes: 727 additions & 0 deletions gtfs_digest/47_check_operators.ipynb

Large diffs are not rendered by default.

61 changes: 55 additions & 6 deletions gtfs_funnel/route_typologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,15 @@ def overlay_shapes_to_roads(
buffer_meters: int
) -> gpd.GeoDataFrame:

# AH: removed pipe b/c it erases routes from Amtrak
#common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(
# analysis_date
#).pipe(helpers.remove_shapes_outside_ca)

common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(
analysis_date
).pipe(helpers.remove_shapes_outside_ca)

)
common_shape = common_shape.assign(
route_meters = common_shape.geometry.length,
)
Expand Down Expand Up @@ -352,6 +357,34 @@ def reconcile_route_and_nacto_typologies(

return df2

def add_rail_back(
categorize_routes_df: pd.DataFrame, overlay_shapes_to_roads_df: pd.DataFrame
) -> pd.DataFrame:
"""
categorize_routes_df: df created by categorize_routes_by_name()
overlay_shapes_to_roads_df: df created by overlay_shapes_to_roads()
"""
# Filter out for only rail routes and drop duplicates.
rail_routes = categorize_routes_df.loc[categorize_routes_df.is_rail == 1][
["route_id", "schedule_gtfs_dataset_key"]
].drop_duplicates()

# Merge with route_typologies_df to retain the details for
# columns such as typology, freq_category, etc
m1 = pd.merge(gdf, rail_routes, how="inner")

# Retain only one row for each route-direction-operator
# keeping the row with the highest pct_typology
m1 = m1.sort_values(
by=["route_id", "direction_id", "schedule_gtfs_dataset_key", "pct_typology"],
ascending=[True, True, True, False],
).drop_duplicates(subset=["route_id", "direction_id", "schedule_gtfs_dataset_key"])

# Apply primary_secondary_typology() function which adds
# columns like is_nacto_rapid, is_nacto_coverage
m1 = primary_secondary_typology(m1)

return m1

if __name__ == "__main__":

Expand All @@ -363,7 +396,7 @@ def reconcile_route_and_nacto_typologies(

roads = delayed(prep_roads)(GTFS_DATA_DICT)
ROAD_BUFFER_METERS = 20
TYPOLOGY_THRESHOLD = 0.10
TYPOLOGY_THRESHOLD = 0.1

for analysis_date in analysis_date_list:

Expand All @@ -383,17 +416,33 @@ def reconcile_route_and_nacto_typologies(
# Aggregate to route-dir-typology
route_typology_df2 = primary_secondary_typology(route_typology_df)

# Tag if the route is express, rapid, or rail
route_tagged = categorize_routes_by_name(analysis_date)

# Incorporate back rail routes that disappear if the routs
# dont't meet the minimum set in typology_threshold.
rail_routes_df = add_rail_back(route_tagged, gdf)
all_routes = pd.concat([route_typology_df2, rail_routes_df])


# Merge
df3 = pd.merge(
route_tagged,
route_typology_df2,
all_routes,
on = ["schedule_gtfs_dataset_key", "route_id"],
).pipe(reconcile_route_and_nacto_typologies)

df3.to_parquet(
f"{SCHED_GCS}{EXPORT}_{analysis_date}.parquet")

# Drop duplicates because some rail routes are found both
# route_typology_df2 and rail_routes_df
df3 = (df3.drop_duplicates(
subset = ["schedule_gtfs_dataset_key",
"route_id",
"route_long_name",
"direction_id"])
)
df3.to_parquet(
f"{SCHED_GCS}{EXPORT}_AH_TEST_{analysis_date}.parquet")

time1 = datetime.datetime.now()
print(f"route typologies {analysis_date}: {time1 - time0}")
Expand Down
4 changes: 2 additions & 2 deletions gtfs_funnel/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
)


# analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]
analysis_date_list = all_dates
analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]
# analysis_date_list = all_dates
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

COMPILED_CACHED_VIEWS = GTFS_DATA_DICT.gcs_paths.COMPILED_CACHED_VIEWS
Expand Down
12 changes: 12 additions & 0 deletions portfolio/sites/gtfs_digest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ parts:
- organization_name: Mendocino Transit Authority
- organization_name: POINT
- organization_name: Redwood Coast Transit Authority
- organization_name: Yurok Tribe
- caption: District 02 - Redding
params:
district: 02 - Redding
Expand Down Expand Up @@ -94,6 +95,8 @@ parts:
district: 05 - San Luis Obispo
sections:
- organization_name: City of Guadalupe
- organization_name: City of Lompoc
- organization_name: City of Morro Bay
- organization_name: City of San Luis Obispo
- organization_name: City of Santa Cruz
- organization_name: City of Santa Maria
Expand Down Expand Up @@ -154,6 +157,7 @@ parts:
- organization_name: City of Los Angeles
- organization_name: City of Lynwood
- organization_name: City of Maywood
- organization_name: City of Montebello
- organization_name: City of Monterey Park
- organization_name: City of Moorpark
- organization_name: City of Norwalk
Expand Down Expand Up @@ -186,6 +190,7 @@ parts:
district: 08 - San Bernardino
sections:
- organization_name: Basin Transit
- organization_name: Chemehuevi Indian Tribe
- organization_name: City of Banning
- organization_name: City of Beaumont
- organization_name: City of Corona
Expand All @@ -194,6 +199,7 @@ parts:
- organization_name: OmniTrans
- organization_name: Palo Verde Valley Transit Agency
- organization_name: Riverside Transit Agency
- organization_name: SunLine Transit Agency
- organization_name: Victor Valley Transit Authority
- caption: District 09 - Bishop
params:
Expand Down Expand Up @@ -234,7 +240,13 @@ parts:
params:
district: 12 - Irvine
sections:
- organization_name: Anaheim Transportation Network
- organization_name: City of Dana Point
- organization_name: City of Irvine
- organization_name: City of Laguna Beach
- organization_name: City of Mission Viejo
- organization_name: City of San Clemente
- organization_name: City of San Juan Capistrano
- organization_name: Orange County Transportation Authority
- organization_name: University of California, Irvine
readme: ./gtfs_digest/README.md
Expand Down
1 change: 1 addition & 0 deletions rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,4 @@
2025-01-16 15:49:56.361 | INFO | __main__:route_metrics:88 - route aggregation 2023-10-11: 0:00:01.825395
2025-01-16 15:49:58.178 | INFO | __main__:route_metrics:88 - route aggregation 2023-11-15: 0:00:01.812722
2025-01-16 15:50:00.055 | INFO | __main__:route_metrics:88 - route aggregation 2023-12-13: 0:00:01.873527
2025-01-21 16:29:01.096 | INFO | __main__:route_metrics:88 - route aggregation 2024-11-13: 0:00:03.148850
4 changes: 2 additions & 2 deletions rt_scheduled_v_ran/scripts/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
oct2024_week = rt_dates.get_week("oct2024", exclude_wed=True)


# analysis_date_list = [rt_dates.DATES["nov2024"]]
analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates
analysis_date_list = [rt_dates.DATES["nov2024"]]
# analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates

GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

Expand Down
4 changes: 4 additions & 0 deletions rt_segment_speeds/logs/avg_speeds.log
Original file line number Diff line number Diff line change
Expand Up @@ -678,3 +678,7 @@
2025-01-16 16:28:31.739 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2023-04-15'] execution time: 0:00:17.603240
2025-01-16 16:28:40.309 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:08.508140
2025-01-16 16:28:49.151 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2023-04-16'] execution time: 0:00:17.350031
2025-01-21 16:31:27.163 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:17.106380
2025-01-21 16:31:41.624 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2024-12-11'] execution time: 0:00:31.567351
2025-01-21 16:32:04.052 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:22.327037
2025-01-21 16:32:17.670 | INFO | __main__:summary_average_speeds:154 - rt_stop_times summary speed averaging for ['2024-11-13'] execution time: 0:00:35.944913
5 changes: 3 additions & 2 deletions rt_segment_speeds/segment_speed_utils/project_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
rt_dates.oct2023_week, rt_dates.apr2023_week,
]

analysis_date_list = apr2024_week + oct2023_week + apr2023_week


analysis_date_list = apr2024_week + oct2023_week + apr2023_week
analysis_date_list = [rt_dates.DATES["dec2024"]] + [rt_dates.DATES['nov2024']]

PROJECT_CRS = "EPSG:3310"
ROAD_SEGMENT_METERS = 1_000
Expand Down

0 comments on commit 34830ba

Please sign in to comment.