Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hermosa export #1307

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions ca_transit_speed_maps/03_hermosa_adhoc_export.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 24,
"id": "46898b5c-b5cc-4096-ab68-8c3e42fab870",
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"import calitp_data_analysis.magics\n",
"# from update_vars_index import ANALYSIS_DATE\n",
"\n",
"import speedmap_utils\n",
"import pandas as pd\n",
"import geopandas as gpd\n",
"import numpy as np\n",
"from siuba import *\n",
"import shared_utils\n",
"catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')"
]
},
{
"cell_type": "markdown",
"id": "5f100bb7-182c-4d78-a8f8-1b2eabf70650",
"metadata": {},
"source": [
"## https://github.com/cal-itp/data-analyses/issues/1306"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9f15ed6a-5ab7-4f57-9695-3f762781b74c",
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"## parameters cell\n",
"organization_source_record_ids = ['rec4pgjrmdhCh4z01', 'rec8zhnCPETu6qEiH', 'recPnGkwdpnr8jmHB',\n",
" 'recvzE9NXgGMmqcTH']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "7c53d945-7295-4d86-9110-b6f4f52f9975",
"metadata": {},
"outputs": [],
"source": [
"dates = ['jan2024', 'feb2024', 'mar2024', 'apr2024',\n",
" 'may2024', 'jun2024', 'jul2024', 'aug2024',\n",
" 'sep2024', 'oct2024', 'nov2024']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "5843d0b3-a500-4c2e-8a65-bfe2935ce089",
"metadata": {},
"outputs": [],
"source": [
"dates = [shared_utils.rt_dates.DATES[date] for date in dates]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "1989aba3-8cf6-48c4-aceb-2399eea80de2",
"metadata": {},
"outputs": [],
"source": [
"def read_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:\n",
" '''\n",
" Get filtered detailed speedmap segments for an organization, and relevant district SHN.\n",
" '''\n",
" path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n",
" # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'\n",
" speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],\n",
" ['route_short_name', 'in', ['232', '109', '438', '13']]]) # aggregated\n",
" assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'\n",
" speedmap_segs['date'] = analysis_date\n",
" \n",
" # TODO move upstream and investigate\n",
" speedmap_segs['fast_slow_ratio'] = speedmap_segs.p80_mph / speedmap_segs.p20_mph\n",
" speedmap_segs.fast_slow_ratio = speedmap_segs.fast_slow_ratio.replace(np.inf, 3)\n",
" speedmap_segs = speedmap_segs.round(1)\n",
" # speedmap_segs = prepare_segment_gdf(speedmap_segs)\n",
" # shn = gpd.read_parquet(rt_utils.SHN_PATH)\n",
" # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))\n",
" \n",
" return speedmap_segs"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "c3333da1-b90c-4ed3-8655-cd668ef33ed4",
"metadata": {},
"outputs": [],
"source": [
"gdf = read_segments(organization_source_record_ids, dates[0])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "420b81e4-80db-4385-9961-58007bbdb5b5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-01-17\n",
"2024-02-14\n",
"2024-03-13\n",
"2024-04-17\n",
"2024-05-22\n",
"2024-06-12\n",
"2024-07-17\n",
"2024-08-14\n",
"2024-09-18\n",
"2024-10-16\n",
"2024-11-13\n"
]
}
],
"source": [
"lines = gpd.GeoDataFrame()\n",
"for date in dates:\n",
" print(date)\n",
" lines = pd.concat([read_segments(organization_source_record_ids, date), lines])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "359dc68a-903b-455f-b419-920fa506ebed",
"metadata": {},
"outputs": [],
"source": [
"def read_process_segments(organization_source_record_ids: list, analysis_date) -> gpd.GeoDataFrame:\n",
" '''\n",
" Get filtered detailed speedmap segments for an organization, and relevant district SHN.\n",
" '''\n",
" path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n",
" # path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'\n",
" speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', 'in', organization_source_record_ids],\n",
" ['route_short_name', 'in', ['232', '109', '438', '13']]]) # aggregated\n",
" assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'\n",
" speedmap_segs['date'] = analysis_date\n",
" speedmap_segs = speedmap_utils.prepare_segment_gdf(speedmap_segs)\n",
" # shn = gpd.read_parquet(rt_utils.SHN_PATH)\n",
" # this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))\n",
" \n",
" return speedmap_segs"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "bc4bc624-f5d1-42e0-89a1-858671753de6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-01-17\n",
"2024-02-14\n",
"2024-03-13\n",
"2024-04-17\n",
"2024-05-22\n",
"2024-06-12\n",
"2024-07-17\n",
"2024-08-14\n",
"2024-09-18\n",
"2024-10-16\n",
"2024-11-13\n"
]
}
],
"source": [
"polygons = gpd.GeoDataFrame()\n",
"for date in dates:\n",
" print(date)\n",
" polygons = pd.concat([read_process_segments(organization_source_record_ids, date), polygons])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "b5786ff5-e33e-4bbd-acc2-b8459f4e1f13",
"metadata": {},
"outputs": [],
"source": [
"polygons.to_file('hermosa_speedmap_polygons.geojson')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "fbd4f2b9-257a-4c60-b9d0-fa51f8fb2982",
"metadata": {},
"outputs": [],
"source": [
"lines.to_file('hermosa_speedmap_lines.geojson')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d26b1dbf-15db-4d28-bac9-f3f3885d717a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 37,
"id": "821bfbee-f2d4-4e06-aec0-aa877d27db4f",
"metadata": {},
"outputs": [],
"source": [
"def write_gz(gdf, path):\n",
" geojson_str = gdf.to_json()\n",
" geojson_bytes = geojson_str.encode(\"utf-8\")\n",
" # if verbose:\n",
" # print(f\"writing to {path}\")\n",
" with open(path, \"wb\") as writer:\n",
" with gzip.GzipFile(fileobj=writer, mode=\"w\") as gz:\n",
" gz.write(geojson_bytes)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "7733fb48-5403-43b7-9ba2-a86992d1f79a",
"metadata": {},
"outputs": [],
"source": [
"import gzip"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "4f1776af-65b8-413d-8d02-5aa66f0671b0",
"metadata": {},
"outputs": [],
"source": [
"write_gz(polygons, 'hermosa_speedmap_polygons.geojson.gz')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "88f8b256-4bce-4cc4-b878-d1fe1c512100",
"metadata": {},
"outputs": [],
"source": [
"write_gz(lines, 'hermosa_speedmap_lines.geojson.gz')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 1 addition & 1 deletion ca_transit_speed_maps/speedmap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def read_segments_shn(organization_source_record_id: str) -> (gpd.GeoDataFrame,
path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
# path = f'{catalog.stop_segments.dir}{catalog.stop_segments.route_dir_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet'
speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated
assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'x
assert (speedmap_segs >> select(-_.route_short_name)).isna().any().any() == False, 'no cols besides route_short_name should be nan'
speedmap_segs = prepare_segment_gdf(speedmap_segs)
shn = gpd.read_parquet(rt_utils.SHN_PATH)
this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))
Expand Down
21 changes: 21 additions & 0 deletions rt_segment_speeds/logs/avg_speeds.log
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,24 @@
2024-11-15 16:47:26.586 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-11-13'] execution time: 0:04:17.582380
2024-11-15 16:47:58.691 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:15.263016
2024-11-15 16:48:12.028 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-11-13'] execution time: 0:00:28.600051
2024-11-25 15:13:37.373 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-02-14'] execution time: 0:03:26.605094
2024-11-25 15:17:40.046 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-02-14'] execution time: 0:04:02.591857
2024-11-25 15:56:22.157 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:14.429525
2024-11-25 15:56:33.970 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-02-14'] execution time: 0:00:26.242649
2024-11-25 16:13:02.901 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-02-14'] execution time: 0:05:49.602618
2024-11-25 16:18:14.085 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-02-14'] execution time: 0:05:11.036585
2024-11-25 16:22:17.639 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-02-14'] execution time: 0:04:03.406461
2024-11-25 17:36:36.926 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-04-17'] execution time: 0:03:37.971380
2024-11-25 17:40:46.424 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-04-17'] execution time: 0:04:09.371271
2024-11-25 18:21:02.631 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:15.281556
2024-11-25 18:21:14.991 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-04-17'] execution time: 0:00:27.640841
2024-11-25 18:38:30.685 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-04-17'] execution time: 0:05:56.867437
2024-11-25 18:43:43.127 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-04-17'] execution time: 0:05:12.297407
2024-11-25 18:47:46.966 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-04-17'] execution time: 0:04:03.631509
2024-11-25 20:16:48.107 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-07-17'] execution time: 0:03:01.260045
2024-11-25 20:20:35.395 | INFO | __main__:segment_averages:183 - stop_segments segment averaging for ['2024-07-17'] execution time: 0:03:47.132474
2024-11-25 20:58:10.113 | INFO | __main__:summary_average_speeds:120 - trip avg 0:00:14.389431
2024-11-25 20:58:21.747 | INFO | __main__:summary_average_speeds:155 - rt_stop_times summary speed averaging for ['2024-07-17'] execution time: 0:00:26.022796
2024-11-25 21:14:33.232 | INFO | average_segment_speeds:segment_averages_detail:247 - speedmap_segments detailed segment averaging for ['2024-07-17'] execution time: 0:05:37.386735
2024-11-25 21:19:21.360 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-07-17'] execution time: 0:04:47.990211
2024-11-25 21:23:05.965 | INFO | average_segment_speeds:segment_averages:183 - speedmap_segments segment averaging for ['2024-07-17'] execution time: 0:03:44.478500
6 changes: 6 additions & 0 deletions rt_segment_speeds/logs/cut_stop_segments.log
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@
2024-11-15 12:05:36.298 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-11-13: 0:05:14.586933
2024-11-25 11:51:31.638 | INFO | __main__:<module>:155 - cut segments 2024-01-17: 0:21:23.110335
2024-11-25 11:57:30.752 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-01-17: 0:04:25.925486
2024-11-25 14:26:37.567 | INFO | __main__:<module>:155 - cut segments 2024-02-14: 0:20:14.982057
2024-11-25 14:32:07.865 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-02-14: 0:04:06.440641
2024-11-25 16:46:11.154 | INFO | __main__:<module>:155 - cut segments 2024-04-17: 0:21:24.813915
2024-11-25 16:52:05.319 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-04-17: 0:04:28.728100
2024-11-25 19:29:33.128 | INFO | __main__:<module>:155 - cut segments 2024-07-17: 0:19:53.239259
2024-11-25 19:35:15.706 | INFO | __main__:<module>:244 - speedmap segments and proxy_stop_times 2024-07-17: 0:04:18.392759
9 changes: 9 additions & 0 deletions rt_segment_speeds/logs/interpolate_stop_arrival.log
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,12 @@
2024-11-15 12:44:55.085 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-11-13: 2024-11-13: 0:15:15.789486
2024-11-15 13:34:38.198 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-11-13: 2024-11-13: 0:15:41.682831
2024-11-15 13:46:49.459 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-11-13: 2024-11-13: 0:03:04.062272
2024-11-25 15:08:19.483 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-02-14: 2024-02-14: 0:14:15.053152
2024-11-25 15:54:03.502 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-02-14: 2024-02-14: 0:14:19.447104
2024-11-25 16:04:58.835 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-02-14: 2024-02-14: 0:02:41.448366
2024-11-25 17:31:02.567 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-04-17: 2024-04-17: 0:15:55.554641
2024-11-25 18:18:13.220 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-04-17: 2024-04-17: 0:14:30.258514
2024-11-25 18:30:15.050 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-04-17: 2024-04-17: 0:02:50.595931
2024-11-25 20:11:30.072 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for stop_segments 2024-07-17: 2024-07-17: 0:14:23.629904
2024-11-25 20:55:50.314 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for rt_stop_times 2024-07-17: 2024-07-17: 0:13:51.485049
2024-11-25 21:06:50.404 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:279 - interpolate arrivals for speedmap_segments 2024-07-17: 2024-07-17: 0:02:33.588358
18 changes: 18 additions & 0 deletions rt_segment_speeds/logs/nearest_vp.log
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,21 @@
2024-11-15 13:18:56.444 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-11-13: 0:10:40.446083
2024-11-15 13:40:07.685 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-11-13: 0:02:36.767174
2024-11-15 13:43:45.362 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-11-13: 0:03:37.215365
2024-11-25 14:44:38.184 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-02-14: 0:12:12.259095
2024-11-25 14:54:04.369 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-02-14: 0:09:24.152632
2024-11-25 15:29:52.547 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-02-14: 0:11:56.560691
2024-11-25 15:39:43.982 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-02-14: 0:09:49.647278
2024-11-25 15:59:02.856 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-02-14: 0:02:13.301548
2024-11-25 16:02:17.354 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-02-14: 0:03:14.096258
2024-11-25 17:04:55.099 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-04-17: 0:12:31.879883
2024-11-25 17:15:06.934 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-04-17: 0:10:09.477922
2024-11-25 17:53:10.041 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-04-17: 0:12:06.825727
2024-11-25 18:03:42.892 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-04-17: 0:10:30.863101
2024-11-25 18:23:48.992 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-04-17: 0:02:18.365295
2024-11-25 18:27:24.420 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-04-17: 0:03:35.000398
2024-11-25 19:47:37.984 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for stop_segments 2024-07-17: 0:12:03.630595
2024-11-25 19:57:06.324 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for stop_segments 2024-07-17: 0:09:26.376710
2024-11-25 20:32:12.883 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for rt_stop_times 2024-07-17: 0:11:20.451706
2024-11-25 20:41:58.771 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for rt_stop_times 2024-07-17: 0:09:44.129455
2024-11-25 21:00:54.275 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:178 - nearest neighbor for speedmap_segments 2024-07-17: 0:02:16.592519
2024-11-25 21:04:16.776 | INFO | vp_around_stops:filter_to_nearest_two_vp:248 - nearest 2 vp for speedmap_segments 2024-07-17: 0:03:22.103136
Loading
Loading