Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exploring NTD Proposed Changes 25-26 #1317

Merged
merged 15 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 78 additions & 98 deletions gtfs_schedule/09_bus_stops_in_ca.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
{
"data": {
"text/plain": [
"array(['3', '4', '0', '2', '1', '0, 3', '5', '2, 3'], dtype=object)"
"array(['3', '2', '2, 3', '1', '4', '0', '0, 3', '5'], dtype=object)"
]
},
"metadata": {},
Expand Down Expand Up @@ -262,7 +262,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(6981, 5)\n"
"(7026, 5)\n"
]
}
],
Expand Down Expand Up @@ -374,8 +374,8 @@
" <th>base64_url</th>\n",
" <th>gtfs_dataset_key</th>\n",
" <th>name</th>\n",
" <th>regional_feed_type</th>\n",
" <th>type</th>\n",
" <th>regional_feed_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -388,8 +388,8 @@
" <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
" <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
" <td>SBMTD Schedule</td>\n",
" <td>None</td>\n",
" <td>schedule</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -405,11 +405,11 @@
" base64_url \\\n",
"40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n",
"\n",
" gtfs_dataset_key name regional_feed_type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n",
" gtfs_dataset_key name type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n",
"\n",
" type \n",
"40 schedule "
" regional_feed_type \n",
"40 None "
]
},
"execution_count": 18,
Expand Down Expand Up @@ -576,8 +576,8 @@
" <th>base64_url</th>\n",
" <th>gtfs_dataset_key</th>\n",
" <th>name</th>\n",
" <th>regional_feed_type</th>\n",
" <th>type</th>\n",
" <th>regional_feed_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -590,8 +590,8 @@
" <td>aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm...</td>\n",
" <td>239f3baf3dd3b9e9464f66a777f9897d</td>\n",
" <td>SBMTD Schedule</td>\n",
" <td>None</td>\n",
" <td>schedule</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
Expand All @@ -607,11 +607,11 @@
" base64_url \\\n",
"40 aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZm... \n",
"\n",
" gtfs_dataset_key name regional_feed_type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule None \n",
" gtfs_dataset_key name type \\\n",
"40 239f3baf3dd3b9e9464f66a777f9897d SBMTD Schedule schedule \n",
"\n",
" type \n",
"40 schedule "
" regional_feed_type \n",
"40 None "
]
},
"execution_count": 22,
Expand Down Expand Up @@ -1024,44 +1024,44 @@
"output_type": "stream",
"text": [
"<class 'geopandas.geodataframe.GeoDataFrame'>\n",
"Int64Index: 14610 entries, 29 to 69071\n",
"Int64Index: 28088 entries, 3 to 69071\n",
"Data columns (total 16 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 org_id 14610 non-null object \n",
" 1 agency 14610 non-null object \n",
" 2 stop_id 14610 non-null object \n",
" 3 stop_name 14610 non-null object \n",
" 4 n_routes 13989 non-null float64 \n",
" 5 route_ids_served 13989 non-null object \n",
" 6 routetypes 13989 non-null object \n",
" 7 n_arrivals 13989 non-null float64 \n",
" 8 n_hours_in_service 13989 non-null float64 \n",
" 9 base64_url 14610 non-null object \n",
" 10 geometry 14610 non-null geometry\n",
" 11 date 14610 non-null object \n",
" 12 Route 14610 non-null int64 \n",
" 13 RouteType 14610 non-null object \n",
" 0 org_id 28088 non-null object \n",
" 1 agency 28088 non-null object \n",
" 2 stop_id 28088 non-null object \n",
" 3 stop_name 28088 non-null object \n",
" 4 n_routes 27467 non-null float64 \n",
" 5 route_ids_served 27467 non-null object \n",
" 6 routetypes 27467 non-null object \n",
" 7 n_arrivals 27467 non-null float64 \n",
" 8 n_hours_in_service 27467 non-null float64 \n",
" 9 base64_url 28088 non-null object \n",
" 10 geometry 28088 non-null geometry\n",
" 11 date 28088 non-null object \n",
" 12 Route 28088 non-null int64 \n",
" 13 RouteType 28088 non-null object \n",
" 14 route_id 621 non-null object \n",
" 15 route_type 621 non-null object \n",
"dtypes: float64(3), geometry(1), int64(1), object(11)\n",
"memory usage: 1.9+ MB\n",
"memory usage: 3.6+ MB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 7216 entries, 0 to 7215\n",
"RangeIndex: 7261 entries, 0 to 7260\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 agency 7216 non-null object \n",
" 1 stop_id 7216 non-null object \n",
" 2 stop_name 7216 non-null object \n",
" 3 shn_route_type 7216 non-null object \n",
" 4 date 7216 non-null object \n",
" 5 shn_route 7216 non-null int64 \n",
" 6 routetypes 7044 non-null object \n",
" 7 x 7216 non-null float64\n",
" 8 y 7216 non-null float64\n",
" 0 agency 7261 non-null object \n",
" 1 stop_id 7261 non-null object \n",
" 2 stop_name 7261 non-null object \n",
" 3 shn_route_type 7261 non-null object \n",
" 4 date 7261 non-null object \n",
" 5 shn_route 7261 non-null int64 \n",
" 6 routetypes 7089 non-null object \n",
" 7 x 7261 non-null float64\n",
" 8 y 7261 non-null float64\n",
"dtypes: float64(2), int64(1), object(6)\n",
"memory usage: 507.5+ KB\n"
"memory usage: 510.7+ KB\n"
]
},
{
Expand Down Expand Up @@ -1097,7 +1097,7 @@
"metadata": {},
"outputs": [],
"source": [
"stops_for_export.to_csv(\"ca_stops_revised.csv\", index=False)"
"stops_for_export.to_csv(f\"{SCHED_GCS}ca_stops_revised.csv\", index=False)"
]
},
{
Expand Down Expand Up @@ -1230,7 +1230,7 @@
"outputs": [
{
"ename": "KeyError",
"evalue": "'Palo Verde Valley Transit Agency'",
"evalue": "'Butte County Association of Governments'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
Expand All @@ -1241,7 +1241,7 @@
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/core/apply.py:1174\u001b[0m, in \u001b[0;36mSeriesApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1172\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1173\u001b[0m values \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mobject\u001b[39m)\u001b[38;5;241m.\u001b[39m_values\n\u001b[0;32m-> 1174\u001b[0m mapped \u001b[38;5;241m=\u001b[39m \u001b[43mlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap_infer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1175\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1176\u001b[0m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1177\u001b[0m \u001b[43m \u001b[49m\u001b[43mconvert\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconvert_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1180\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(mapped) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(mapped[\u001b[38;5;241m0\u001b[39m], ABCSeries):\n\u001b[1;32m 1181\u001b[0m \u001b[38;5;66;03m# GH#43986 Need to do list(mapped) in order to get treated as nested\u001b[39;00m\n\u001b[1;32m 1182\u001b[0m \u001b[38;5;66;03m# See also GH#25959 regarding EA support\u001b[39;00m\n\u001b[1;32m 1183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m_constructor_expanddim(\u001b[38;5;28mlist\u001b[39m(mapped), index\u001b[38;5;241m=\u001b[39mobj\u001b[38;5;241m.\u001b[39mindex)\n",
"File \u001b[0;32m/opt/conda/lib/python3.9/site-packages/pandas/_libs/lib.pyx:2924\u001b[0m, in \u001b[0;36mpandas._libs.lib.map_infer\u001b[0;34m()\u001b[0m\n",
"Cell \u001b[0;32mIn[52], line 1\u001b[0m, in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[0;32m----> 1\u001b[0m to_map[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolor\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m to_map\u001b[38;5;241m.\u001b[39magency\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m x: hex_to_rgb(\u001b[43mcolor_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[43mx\u001b[49m\u001b[43m]\u001b[49m[\u001b[38;5;241m1\u001b[39m:]))\n",
"\u001b[0;31mKeyError\u001b[0m: 'Palo Verde Valley Transit Agency'"
"\u001b[0;31mKeyError\u001b[0m: 'Butte County Association of Governments'"
]
}
],
Expand All @@ -1259,33 +1259,13 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to calitp-map-tiles/shs_stops/shs.geojson.gz\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:892: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n",
"/home/jovyan/data-analyses/_shared_utils/shared_utils/rt_utils.py:912: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n",
"\n",
" centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"writing to calitp-map-tiles/shs_stops/stops4.geojson.gz\n"
]
}
],
"source": [
Expand Down Expand Up @@ -1315,9 +1295,9 @@
" {'name': 'SHS with Stops Sep 2024',\n",
" 'url': 'https://storage.googleapis.com/calitp-map-tiles/shs_stops/stops4.geojson.gz',\n",
" 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],\n",
" 'lat_lon': (36.067266532548935, -119.93886599913799),\n",
" 'lat_lon': (36.10085893610959, -119.97596894016574),\n",
" 'zoom': 13},\n",
" 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjA2NzI2NjUzMjU0ODkzNSwgLTExOS45Mzg4NjU5OTkxMzc5OV0sICJ6b29tIjogMTN9'}"
" 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJNYXAiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3Noc19zdG9wcy9zaHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAic3RhdGVfaGlnaHdheV9uZXR3b3JrIn0sIHsibmFtZSI6ICJTSFMgd2l0aCBTdG9wcyBTZXAgMjAyNCIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc2hzX3N0b3BzL3N0b3BzNC5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9fV0sICJsYXRfbG9uIjogWzM2LjEwMDg1ODkzNjEwOTU5LCAtMTE5Ljk3NTk2ODk0MDE2NTc0XSwgInpvb20iOiAxM30='}"
]
},
"execution_count": 54,
Expand Down Expand Up @@ -1415,20 +1395,20 @@
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 165 entries, 0 to 164\n",
"Int64Index: 166 entries, 0 to 165\n",
"Data columns (total 6 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 organization_key 165 non-null object\n",
" 1 organization_name 165 non-null object\n",
" 2 service_key 165 non-null object\n",
" 3 service_name 165 non-null object\n",
" 4 funding_program_key 165 non-null object\n",
" 5 funding_program_name 165 non-null object\n",
" 0 organization_key 166 non-null object\n",
" 1 organization_name 166 non-null object\n",
" 2 service_key 166 non-null object\n",
" 3 service_name 166 non-null object\n",
" 4 funding_program_key 166 non-null object\n",
" 5 funding_program_name 166 non-null object\n",
"dtypes: object(6)\n",
"memory usage: 9.0+ KB\n",
"memory usage: 9.1+ KB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 90 entries, 0 to 163\n",
"Int64Index: 90 entries, 0 to 164\n",
"Data columns (total 2 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
Expand Down Expand Up @@ -1532,30 +1512,30 @@
"output_type": "stream",
"text": [
"<class 'geopandas.geodataframe.GeoDataFrame'>\n",
"RangeIndex: 3390 entries, 0 to 3389\n",
"RangeIndex: 3470 entries, 0 to 3469\n",
"Data columns (total 18 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 org_id 3390 non-null object \n",
" 1 agency 3390 non-null object \n",
" 2 stop_id 3390 non-null object \n",
" 3 stop_name 3390 non-null object \n",
" 4 n_routes 3309 non-null float64 \n",
" 5 route_ids_served 3309 non-null object \n",
" 6 routetypes 3309 non-null object \n",
" 7 n_arrivals 3309 non-null float64 \n",
" 8 n_hours_in_service 3309 non-null float64 \n",
" 9 base64_url 3390 non-null object \n",
" 10 geometry 3390 non-null geometry\n",
" 11 date 3390 non-null object \n",
" 12 shn_route 3390 non-null int64 \n",
" 13 shn_route_type 3390 non-null object \n",
" 0 org_id 3470 non-null object \n",
" 1 agency 3470 non-null object \n",
" 2 stop_id 3470 non-null object \n",
" 3 stop_name 3470 non-null object \n",
" 4 n_routes 3389 non-null float64 \n",
" 5 route_ids_served 3389 non-null object \n",
" 6 routetypes 3389 non-null object \n",
" 7 n_arrivals 3389 non-null float64 \n",
" 8 n_hours_in_service 3389 non-null float64 \n",
" 9 base64_url 3470 non-null object \n",
" 10 geometry 3470 non-null geometry\n",
" 11 date 3470 non-null object \n",
" 12 shn_route 3470 non-null int64 \n",
" 13 shn_route_type 3470 non-null object \n",
" 14 route_id 81 non-null object \n",
" 15 route_type 81 non-null object \n",
" 16 organization_key 3390 non-null object \n",
" 17 5311_agency 3390 non-null bool \n",
" 16 organization_key 3470 non-null object \n",
" 17 5311_agency 3470 non-null bool \n",
"dtypes: bool(1), float64(3), geometry(1), int64(1), object(12)\n",
"memory usage: 453.7+ KB\n",
"memory usage: 464.4+ KB\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 69 entries, 0 to 68\n",
"Data columns (total 3 columns):\n",
Expand Down Expand Up @@ -1605,10 +1585,10 @@
"outputs": [],
"source": [
"# exporting gdf w/ 5311 flag to csv\n",
"gdf_merge.to_csv(\"ca_stops_revised_5311.csv\", index=False)\n",
"gdf_merge.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311.csv\", index=False)\n",
"\n",
"# exporting gdf_add to csv\n",
"gdf_agg.to_csv(\"ca_stops_revised_5311_agencies.csv\", index=False)"
"gdf_agg.to_csv(f\"{SCHED_GCS}ca_stops_revised_5311_agencies.csv\", index=False)"
]
},
{
Expand Down
Loading
Loading