-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
1,181 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
"""A notebook containing viable code for additional stop_id validation.""" | ||
# %% | ||
# IMPORTS | ||
from transport_performance.gtfs.validation import GtfsInstance | ||
from transport_performance.gtfs.gtfs_utils import _add_validation_row | ||
|
||
|
||
# %% | ||
# SAMPLE FUNCTION | ||
def validate_stops(gtfs: GtfsInstance) -> None: | ||
"""Validate stop_ids across stops and stop_times tables. | ||
Parameters | ||
---------- | ||
gtfs : GtfsInstance | ||
The GTFS instance to validate | ||
Returns | ||
------- | ||
None | ||
""" | ||
stops = gtfs.feed.stops.copy() | ||
stop_times = gtfs.feed.stop_times.copy() | ||
|
||
# determine which stops are parent stops and remove them | ||
parents = stops.parent_station.unique() | ||
stops = stops[~stops.stop_id.isin(parents)] | ||
|
||
# get unique stop_ids from both tables as dataframes | ||
|
||
stops_ids = stops[["stop_id"]].drop_duplicates() | ||
stop_t_ids = stop_times[["stop_id"]].drop_duplicates() | ||
|
||
stops_ids["valid"] = 1 | ||
stop_t_ids["valid"] = 1 | ||
merged_ids = stops_ids.merge( | ||
stop_t_ids, how="outer", on="stop_id", suffixes=("_s", "_st") | ||
) | ||
|
||
stop_t_count = merged_ids["valid_st"].isna().count() | ||
stop_t_invalid_ids = merged_ids[ | ||
merged_ids["valid_st"].isna() | ||
].stop_id.unique() | ||
|
||
stops_count = merged_ids["valid_s"].isna().count() | ||
stops_invalid_ids = merged_ids[ | ||
merged_ids["valid_s"].isna() | ||
].stop_id.unique() | ||
|
||
if stops_count > 0: | ||
impacted_rows = list( | ||
gtfs.feed.stop_times[ | ||
gtfs.feed.stop_times.stop_id.isin(stops_invalid_ids) | ||
].index | ||
) | ||
_add_validation_row( | ||
gtfs=gtfs, | ||
_type="warning", | ||
message="stop_id's exist in stop_times but not in stops", | ||
table="stop_times", | ||
rows=impacted_rows, | ||
) | ||
|
||
if stop_t_count > 0: | ||
impacted_rows = list( | ||
gtfs.feed.stops[ | ||
gtfs.feed.stops.stop_id.isin(stop_t_invalid_ids) | ||
].index | ||
) | ||
_add_validation_row( | ||
gtfs=gtfs, | ||
_type="warning", | ||
message="stop_id's exist in stops but not in stop_times", | ||
table="stops", | ||
rows=impacted_rows, | ||
) | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
"""A set of functions that clean the gtfs data.""" | ||
from typing import Union | ||
|
||
import numpy as np | ||
|
||
from transport_performance.utils.defence import _gtfs_defence, _check_list | ||
|
||
|
||
def drop_trips(gtfs, trip_id: Union[str, list, np.ndarray]) -> None: | ||
"""Drop trip{s} from a GtfsInstance object. | ||
Parameters | ||
---------- | ||
gtfs : GtfsInstance | ||
The GtfsInstance object to drop the trip(s) from | ||
trip_id : Union[str, list, np.ndarray] | ||
The trip ID(s) of the trip to be dropped from the gtfs data. | ||
Returns | ||
------- | ||
None | ||
""" | ||
# defences | ||
_gtfs_defence(gtfs, "gtfs") | ||
if not isinstance(trip_id, (str, list, np.ndarray)): | ||
raise TypeError( | ||
f"'trip_id' received type: {type(trip_id)}. " | ||
"Expected types: [str, list, np.ndarray]" | ||
) | ||
# ensure trip ID is an iterable | ||
if isinstance(trip_id, str): | ||
trip_id = [trip_id] | ||
|
||
# _check_list only takes lists, therefore convert numpy arrays | ||
if isinstance(trip_id, np.ndarray): | ||
trip_id = list(trip_id) | ||
|
||
# ensure trip ids are string | ||
_check_list( | ||
ls=trip_id, param_nm="trip_id", check_elements=True, exp_type=str | ||
) | ||
|
||
# drop relevant records from tables | ||
gtfs.feed.trips = gtfs.feed.trips[ | ||
~gtfs.feed.trips["trip_id"].isin(trip_id) | ||
] | ||
gtfs.feed.stop_times = gtfs.feed.stop_times[ | ||
~gtfs.feed.stop_times["trip_id"].isin(trip_id) | ||
] | ||
|
||
# finish cleaning up deleted trips | ||
gtfs.feed = gtfs.feed.drop_zombies() | ||
|
||
# re-run so that summaries can be updated | ||
gtfs.pre_processed_trips = gtfs._preprocess_trips_and_routes() | ||
return None | ||
|
||
|
||
def clean_consecutive_stop_fast_travel_warnings( | ||
gtfs, validate: bool = False | ||
) -> None: | ||
"""Clean 'Fast Travel Between Consecutive Stops' warnings from validity_df. | ||
Parameters | ||
---------- | ||
gtfs : GtfsInstance | ||
The GtfsInstance to clean warnings within | ||
validate : bool, optional | ||
Whether or not to validate the gtfs before carrying out this cleaning | ||
operation | ||
Returns | ||
------- | ||
None | ||
""" | ||
# defences | ||
_gtfs_defence(gtfs, "gtfs") | ||
if "validity_df" not in gtfs.__dict__.keys() and not validate: | ||
raise AttributeError( | ||
"The gtfs has not been validated, therefore no" | ||
"warnings can be identified. You can pass " | ||
"validate=True to this function to validate the " | ||
"gtfs." | ||
) | ||
|
||
if validate: | ||
gtfs.is_valid() | ||
|
||
needed_warning = ( | ||
gtfs.validity_df[ | ||
gtfs.validity_df["message"] | ||
== "Fast Travel Between Consecutive Stops" | ||
] | ||
.copy() | ||
.values | ||
) | ||
|
||
if len(needed_warning) < 1: | ||
return None | ||
|
||
trip_ids = gtfs.full_stop_schedule.loc[ | ||
needed_warning[0][3] | ||
].trip_id.unique() | ||
|
||
# drop trips from tables | ||
drop_trips(gtfs=gtfs, trip_id=trip_ids) | ||
gtfs.full_stop_schedule = gtfs.full_stop_schedule[ | ||
~gtfs.full_stop_schedule["trip_id"].isin(trip_ids) | ||
] | ||
return None | ||
|
||
|
||
def clean_multiple_stop_fast_travel_warnings( | ||
gtfs, validate: bool = False | ||
) -> None: | ||
"""Clean 'Fast Travel Over Multiple Stops' warnings from validity_df. | ||
Parameters | ||
---------- | ||
gtfs : GtfsInstance | ||
The GtfsInstance to clean warnings within | ||
validate : bool, optional | ||
Whether or not to validate the gtfs before carrying out this cleaning | ||
operation | ||
Returns | ||
------- | ||
None | ||
""" | ||
# defences | ||
_gtfs_defence(gtfs, "gtfs") | ||
if "validity_df" not in gtfs.__dict__.keys() and not validate: | ||
raise AttributeError( | ||
"The gtfs has not been validated, therefore no" | ||
"warnings can be identified. You can pass " | ||
"validate=True to this function to validate the " | ||
"gtfs." | ||
) | ||
|
||
if validate: | ||
gtfs.is_valid() | ||
|
||
needed_warning = ( | ||
gtfs.validity_df[ | ||
gtfs.validity_df["message"] == "Fast Travel Over Multiple Stops" | ||
] | ||
.copy() | ||
.values | ||
) | ||
|
||
if len(needed_warning) < 1: | ||
return None | ||
|
||
trip_ids = gtfs.multiple_stops_invalid.loc[ | ||
needed_warning[0][3] | ||
].trip_id.unique() | ||
|
||
# drop trips from tables | ||
drop_trips(gtfs=gtfs, trip_id=trip_ids) | ||
gtfs.multiple_stops_invalid = gtfs.multiple_stops_invalid[ | ||
~gtfs.multiple_stops_invalid["trip_id"].isin(trip_ids) | ||
] | ||
return None |
Oops, something went wrong.