Skip to content

Commit

Permalink
186 filter to date method in gtfsinstance (#190)
Browse files Browse the repository at this point in the history
* feat: Optionally filter gtfs to date list

* refactor: Toml has no nonetype, so empty list used to specify no date filter

* test: Invalid datestring causes raise

* test: Raises if filter_dates values do not exist within gtfs calendar

* refactor: All in_pth values use gtfs fixture path constant

* test: Assert filtered net can build r5py network

* fix: Runinteg invocations in workflows needed fixing

* fix: Runner doesn't install osmosis so deselect all tests with osmosis deps

* feat: Changes on bristol run

* refactor: Defense checks & type hinting

* docs: Add docstring to internal

* chore: Move _validate_datestring to defence.py

* docs: Update docstring

---------

Co-authored-by: Richard Leyshon <[email protected]>
  • Loading branch information
r-leyshon and Richard Leyshon authored Nov 6, 2023
1 parent 8bddb14 commit e6a700b
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 33 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/all-os-tests.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This workflow will install OS dependencies and run a 'base' set of unit tests with Python 3.9
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Unit tests on macOS/Linux/Windows
name: Integration tests no osmosis

on:
push:
Expand All @@ -28,7 +28,4 @@ jobs:
pip install -r requirements.txt
- name: Run Integration Tests # run only those tests marked runinteg & with no osmosis deps
run: |
pytest -m runinteg --runinteg --deselect tests/osm/
- name: Test with pytest # run only tests with no osmosis deps
run: |
pytest --deselect tests/osm/
pytest --runinteg --deselect tests/osm/ --deselect tests/gtfs/test_gtfs_utils.py::TestBboxFilterGtfs::test_bbox_filter_gtfs_to_date_builds_network
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
shell: sh
- name: Run Integration Tests
run: |
pytest -m runinteg --runinteg # run only those tests marked runinteg
pytest --runinteg # run only those tests marked runinteg
- name: pre-commit
run: |
pre-commit install
Expand Down
37 changes: 17 additions & 20 deletions notebooks/e2e/config/e2e.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,12 @@ override = true
input_dir = "data/external/urban_centre/"
merged_path = "data/processed/urban_centre/uc_merged.tif"
bbox = [
-415194.5798256779,
6014542.675452075,
-178899.95729310974,
6239314.71054581
-415194.5798256779, 6014542.675452075, -178899.95729310974, 6239314.71054581
] # must in in 'ESRI:54009', this represents a BBOX around Wales
centre = [51.5773597, -2.9660008]
centre = [51.46737027594145, -2.5988505326255895]
buffer_size = 12000
write_outputs = true
output_map_path = "outputs/e2e/urban_centre/urban_centre_map.html"
output_map_path = "outputs/e2e/bristol/urban_centre/urban_centre_map.html"

[population] # configuration section for population
override = true
Expand All @@ -30,35 +27,35 @@ merged_path = "data/interim/population/pop_merged.tif"
merged_resampled_path = "data/processed/population/pop_merged_resampled.tif"
threshold = 1 # set small and positive, to remove 0 pop cells
write_outputs = true
output_map_path = "outputs/e2e/population/population_map.html"
output_map_path = "outputs/e2e/bristol/population/population_map.html"

[gtfs] # configuration section for gtfs
override = true
input_path = "data/external/gtfs/itm_wales_gtfs.zip"
filtered_path = "data/interim/gtfs/itm_wales_filtered_gtfs.zip"
input_path = "data/external/gtfs/itm_england_gtfs.zip"
filtered_path = "data/interim/gtfs/itm_england_filtered_gtfs.zip"
units = "km"
write_outputs = true
cleaned_path = "data/processed/gtfs/itm_wales_filtered_cleaned_gtfs.zip"
stops_map_path = "outputs/e2e/gtfs/stops_map.html"
hull_map_path = "outputs/e2e/gtfs/hull_map.html"
used_stops_map_path = "outputs/e2e/gtfs/used_stops_map.html"
cleaned_path = "data/processed/gtfs/itm_england_filtered_cleaned_gtfs.zip"
stops_map_path = "outputs/e2e/bristol/gtfs/stops_map.html"
hull_map_path = "outputs/e2e/bristol/gtfs/hull_map.html"
used_stops_map_path = "outputs/e2e/bristol/gtfs/used_stops_map.html"

[osm] # configuration section for osm clipping
override = true
input_path = "data/external/osm/wales-latest.osm.pbf"
filtered_path = "data/processed/osm/wales_latest_filtered.osm.pbf"
tag_filter = false
input_path = "data/external/osm/england-latest.osm.pbf"
filtered_path = "data/processed/osm/england_latest_filtered.osm.pbf"
tag_filter = true

[analyse_network] # configuration for the analyse_network stage
departure_year = 2023
departure_month = 8
departure_day = 8 # this is the date, not the day of the week
departure_month = 10
departure_day = 27 # this is the date, not the day of the week
departure_hour = 8
departure_minute = 0
departure_time_window = 1 # this is in hours
max_time = 45 # this is in minutes
write_outputs = true
outputs_dir = "outputs/e2e/analyse_network/"
outputs_dir = "outputs/e2e/bristol/analyse_network/"
qa_travel_times = false # set to true to qa results with `qa_path`
qa_path = "outputs/e2e/analyse_network/travel_times.pkl"
save_travel_times_for_qa = false # set to true to write to `save_qa_path`
Expand All @@ -68,4 +65,4 @@ save_qa_path = "outputs/e2e/analyse_network/travel_times.pkl"
cut_off_time = 45 # this is in minutes
cut_off_distance = 11250 # this is in meters
write_outputs = true
outputs_dir = "outputs/e2e/metrics/"
outputs_dir = "outputs/e2e/bristol/metrics/"
3 changes: 2 additions & 1 deletion notebooks/e2e/e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@
geoms="hull",
create_out_parent=True,
)
gtfs.feed = gk.miscellany.restrict_to_dates(gtfs.feed, ["20231027"])
gtfs.feed.write(here(gtfs_config["cleaned_path"]))

# %%
Expand Down Expand Up @@ -543,7 +544,7 @@ def plot(

# %%
# visualise for an ID within the UC
UC_ID = 4110
UC_ID = 6456
assert UC_ID in travel_times.to_id.unique()
snippet_id = travel_times[travel_times.to_id == UC_ID]
snippet_id = pop_gdf.merge(snippet_id, left_on="id", right_on="from_id")
Expand Down
20 changes: 20 additions & 0 deletions src/transport_performance/gtfs/gtfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_is_expected_filetype,
_check_iterable,
_type_defence,
_validate_datestring,
_enforce_file_extension,
)
from transport_performance.utils.constants import PKG_PATH
Expand All @@ -35,9 +36,12 @@ def bbox_filter_gtfs(
],
units: str = "km",
crs: str = "epsg:4326",
filter_dates: list = [],
) -> None:
"""Filter a GTFS feed to any routes intersecting with a bounding box.
Optionally filter to a list of given dates.
Parameters
----------
in_pth : Union[pathlib.Path, str], optional
Expand All @@ -54,6 +58,9 @@ def bbox_filter_gtfs(
crs : str, optional
What projection should the `bbox_list` be interpreted as. Defaults to
"epsg:4326" for lat long.
filter_dates: list, optional
A list of dates to restrict the feed to. Not providing filter_dates
means that date filtering will not be applied. Defaults to [].
Returns
-------
Expand All @@ -79,6 +86,7 @@ def bbox_filter_gtfs(
"crs": [crs, str],
"out_pth": [out_pth, (str, pathlib.Path)],
"in_pth": [in_pth, (str, pathlib.Path)],
"filter_dates": [filter_dates, list],
}
for k, v in typing_dict.items():
_type_defence(v[0], k, v[-1])
Expand All @@ -101,6 +109,18 @@ def bbox_filter_gtfs(

feed = gk.read_feed(in_pth, dist_units=units)
restricted_feed = gk.miscellany.restrict_to_area(feed=feed, area=bbox)
# optionally retrict to a date
if len(filter_dates) > 0:
_check_iterable(filter_dates, "filter_dates", list, exp_type=str)
# check date format is acceptable
[_validate_datestring(x) for x in filter_dates]
feed_dates = restricted_feed.get_dates()
diff = set(filter_dates).difference(feed_dates)
if diff:
raise ValueError(f"{diff} not present in feed dates.")
restricted_feed = gk.miscellany.restrict_to_dates(
restricted_feed, filter_dates
)
restricted_feed.write(out_pth)
print(f"Filtered feed written to {out_pth}.")

Expand Down
37 changes: 37 additions & 0 deletions src/transport_performance/utils/defence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import pandas as pd
import warnings
from datetime import datetime


def _handle_path_like(
Expand Down Expand Up @@ -494,3 +495,39 @@ def _enforce_file_extension(
warnings.warn(msg, UserWarning)
path = os.path.normpath(root + f".{default_ext}")
return pathlib.Path(path)


def _validate_datestring(date_text: str, form: str = "%Y%m%d") -> None:
"""Ensure a date string conforms to the expected form.
Parameters
----------
date_text : str
The datestring to be checked.
form : str, optional
The expected date format. Must be a valid C-standard date code format
compatible with datetime. See https://docs.python.org/3/library/datetim
e.html#strftime-and-strptime-format-codes for examples. Defaults to
"%Y%m%d".
Returns
-------
None
Raises
------
ValueError
`date_text` is not in the expected `form`.
TypeError
`date_text` or `form` are not string.
"""
_type_defence(date_text, "date_text", str)
_type_defence(form, "form", str)
try:
datetime.strptime(date_text, form)
except ValueError:
raise ValueError(
f"Incorrect date format, {date_text} should be {form}"
)
return None
83 changes: 77 additions & 6 deletions tests/gtfs/test_gtfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_add_validation_row,
filter_gtfs_around_trip,
convert_pandas_to_plotly,
_validate_datestring,
)

# location of GTFS test fixture
Expand Down Expand Up @@ -45,9 +46,7 @@ def test_bbox_filter_gtfs_writes_with_bbox_list(self, bbox_list, tmpdir):
tmpdir, "newport-train-station-bboxlist_gtfs.zip"
)
bbox_filter_gtfs(
in_pth=os.path.join(
"tests", "data", "gtfs", "newport-20230613_gtfs.zip"
),
GTFS_FIX_PTH,
out_pth=pathlib.Path(tmp_out),
bbox=bbox_list,
)
Expand All @@ -71,9 +70,7 @@ def test_bbox_filter_gtfs_writes_with_bbox_gdf(self, bbox_list, tmpdir):
)

bbox_filter_gtfs(
in_pth=os.path.join(
"tests", "data", "gtfs", "newport-20230613_gtfs.zip"
),
in_pth=GTFS_FIX_PTH,
out_pth=pathlib.Path(tmp_out),
bbox=bbox_gdf,
)
Expand All @@ -87,6 +84,63 @@ def test_bbox_filter_gtfs_writes_with_bbox_gdf(self, bbox_list, tmpdir):
feed, GtfsInstance
), f"Expected class `Gtfs_Instance but found: {type(feed)}`"

def test_bbox_filter_gtfs_raises_date_not_in_gtfs(self, bbox_list, tmpdir):
"""Test raises if filter date is not found within the GTFS calendar."""
with pytest.raises(
ValueError, match="{'30000101'} not present in feed dates."
):
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=os.path.join(tmpdir, "foobar.zip"),
bbox=bbox_list,
filter_dates=["30000101"],
)

def test_bbox_filter_gtfs_filters_to_date(self, bbox_list, tmpdir):
"""Test filtered GTFS behaves as expected."""
out_pth = os.path.join(tmpdir, "out.zip")
# filter to date of fixture ingest
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=out_pth,
bbox=bbox_list,
filter_dates=["20230613"],
)
assert os.path.exists(
out_pth
), f"Expected filtered GTFS was not found at {out_pth}"
# compare dates
fix = GtfsInstance(GTFS_FIX_PTH)
fix_stops_count = len(fix.feed.stops)
filtered = GtfsInstance(out_pth)
filtered_stops_count = len(filtered.feed.stops)
assert (
fix_stops_count > filtered_stops_count
), f"Expected fewer than {fix_stops_count} in filtered GTFS but"
" found {filtered_stops_count}"

@pytest.mark.runinteg
def test_bbox_filter_gtfs_to_date_builds_network(self, bbox_list, tmpdir):
"""Having this flagged as integration test as Java dependency."""
# import goes here to avoid Java warnings as in test_setup.py
import r5py

out_pth = os.path.join(tmpdir, "out.zip")
# filter to date of fixture ingest
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=out_pth,
bbox=bbox_list,
filter_dates=["20230613"],
)
net = r5py.TransportNetwork(
osm_pbf=os.path.join(
"tests", "data", "newport-2023-06-13.osm.pbf"
),
gtfs=[out_pth],
)
assert isinstance(net, r5py.TransportNetwork)


class Test_AddValidationRow(object):
"""Tests for _add_validation_row()."""
Expand Down Expand Up @@ -203,3 +257,20 @@ def test_convert_pandas_to_plotly_on_pass(self, test_df):
"Expected type plotly.graph_objects.Figure but "
f"{type(fig_return)} found"
)


class Test_ValidateDatestring(object):
"""Tests for _validate_datestring."""

def test_validate_datestring_raises(self):
"""Check incompatible datestrings raise."""
with pytest.raises(
ValueError,
match="Incorrect date format, 2023-10-23 should be %Y%m%d",
):
_validate_datestring("2023-10-23")

def test_validate_datestring_on_pass(self):
"""Test that func passes if datestring matches specified form."""
out = _validate_datestring("2023-10-23", form="%Y-%m-%d")
assert isinstance(out, type(None))

0 comments on commit e6a700b

Please sign in to comment.