Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

186 filter to date method in gtfsinstance #190

Merged
merged 14 commits into from
Nov 6, 2023
Merged
7 changes: 2 additions & 5 deletions .github/workflows/all-os-tests.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This workflow will install OS dependencies and run a 'base' set of unit tests with Python 3.9
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Unit tests on macOS/Linux/Windows
name: Integration tests no osmosis

on:
push:
Expand All @@ -28,7 +28,4 @@ jobs:
pip install -r requirements.txt
- name: Run Integration Tests # run only those tests marked runinteg & with no osmosis deps
run: |
pytest -m runinteg --runinteg --deselect tests/osm/
- name: Test with pytest # run only tests with no osmosis deps
run: |
pytest --deselect tests/osm/
pytest --runinteg --deselect tests/osm/ --deselect tests/gtfs/test_gtfs_utils.py::TestBboxFilterGtfs::test_bbox_filter_gtfs_to_date_builds_network
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
shell: sh
- name: Run Integration Tests
run: |
pytest -m runinteg --runinteg # run only those tests marked runinteg
pytest --runinteg # run only those tests marked runinteg
- name: pre-commit
run: |
pre-commit install
Expand Down
29 changes: 29 additions & 0 deletions src/transport_performance/gtfs/gtfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Union
import pathlib
from geopandas import GeoDataFrame
from datetime import datetime

from transport_performance.utils.defence import (
_is_expected_filetype,
Expand All @@ -19,6 +20,15 @@
from transport_performance.utils.constants import PKG_PATH


def _validate_datestring(date_text, form="%Y%m%d"):
try:
datetime.strptime(date_text, form)
except ValueError:
raise ValueError(
f"Incorrect date format, {date_text} should be {form}"
)


r-leyshon marked this conversation as resolved.
Show resolved Hide resolved
def bbox_filter_gtfs(
r-leyshon marked this conversation as resolved.
Show resolved Hide resolved
in_pth: Union[pathlib.Path, str] = (
os.path.join(PKG_PATH, "data", "gtfs", "newport-20230613_gtfs.zip"),
Expand All @@ -34,9 +44,12 @@ def bbox_filter_gtfs(
],
units: str = "km",
crs: str = "epsg:4326",
filter_dates: list = [],
) -> None:
"""Filter a GTFS feed to any routes intersecting with a bounding box.

Optionally filter to a list of given dates.

Parameters
----------
in_pth : Union[pathlib.Path, str], optional
Expand All @@ -53,6 +66,9 @@ def bbox_filter_gtfs(
crs : str, optional
What projection should the `bbox_list` be interpreted as. Defaults to
"epsg:4326" for lat long.
filter_dates: list, optional
A list of dates to restrict the feed to. Defaults to [] meaning that no
date filter will be applied.
r-leyshon marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand All @@ -78,6 +94,7 @@ def bbox_filter_gtfs(
"crs": [crs, str],
"out_pth": [out_pth, (str, pathlib.Path)],
"in_pth": [in_pth, (str, pathlib.Path)],
"filter_dates": [filter_dates, list],
}
for k, v in typing_dict.items():
_type_defence(v[0], k, v[-1])
Expand All @@ -102,6 +119,18 @@ def bbox_filter_gtfs(

feed = gk.read_feed(in_pth, dist_units=units)
restricted_feed = gk.miscellany.restrict_to_area(feed=feed, area=bbox)
# optionally retrict to a date
if len(filter_dates) > 0:
_check_iterable(filter_dates, "filter_dates", list, exp_type=str)
# check date format is acceptable
[_validate_datestring(x) for x in filter_dates]
feed_dates = restricted_feed.get_dates()
diff = set(filter_dates).difference(feed_dates)
if diff:
raise ValueError(f"{diff} not present in feed dates.")
restricted_feed = gk.miscellany.restrict_to_dates(
restricted_feed, filter_dates
)
restricted_feed.write(out_pth)
print(f"Filtered feed written to {out_pth}.")

Expand Down
83 changes: 77 additions & 6 deletions tests/gtfs/test_gtfs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_add_validation_row,
filter_gtfs_around_trip,
convert_pandas_to_plotly,
_validate_datestring,
)

# location of GTFS test fixture
Expand Down Expand Up @@ -45,9 +46,7 @@ def test_bbox_filter_gtfs_writes_with_bbox_list(self, bbox_list, tmpdir):
tmpdir, "newport-train-station-bboxlist_gtfs.zip"
)
bbox_filter_gtfs(
in_pth=os.path.join(
"tests", "data", "gtfs", "newport-20230613_gtfs.zip"
),
GTFS_FIX_PTH,
out_pth=pathlib.Path(tmp_out),
bbox=bbox_list,
)
Expand All @@ -71,9 +70,7 @@ def test_bbox_filter_gtfs_writes_with_bbox_gdf(self, bbox_list, tmpdir):
)

bbox_filter_gtfs(
in_pth=os.path.join(
"tests", "data", "gtfs", "newport-20230613_gtfs.zip"
),
in_pth=GTFS_FIX_PTH,
out_pth=pathlib.Path(tmp_out),
bbox=bbox_gdf,
)
Expand All @@ -87,6 +84,63 @@ def test_bbox_filter_gtfs_writes_with_bbox_gdf(self, bbox_list, tmpdir):
feed, GtfsInstance
), f"Expected class `Gtfs_Instance but found: {type(feed)}`"

def test_bbox_filter_gtfs_raises_date_not_in_gtfs(self, bbox_list, tmpdir):
"""Test raises if filter date is not found within the GTFS calendar."""
with pytest.raises(
ValueError, match="{'30000101'} not present in feed dates."
):
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=os.path.join(tmpdir, "foobar.zip"),
bbox=bbox_list,
filter_dates=["30000101"],
)

def test_bbox_filter_gtfs_filters_to_date(self, bbox_list, tmpdir):
"""Test filtered GTFS behaves as expected."""
out_pth = os.path.join(tmpdir, "out.zip")
# filter to date of fixture ingest
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=out_pth,
bbox=bbox_list,
filter_dates=["20230613"],
)
assert os.path.exists(
out_pth
), f"Expected filtered GTFS was not found at {out_pth}"
# compare dates
fix = GtfsInstance(GTFS_FIX_PTH)
fix_stops_count = len(fix.feed.stops)
filtered = GtfsInstance(out_pth)
filtered_stops_count = len(filtered.feed.stops)
assert (
fix_stops_count > filtered_stops_count
), f"Expected fewer than {fix_stops_count} in filtered GTFS but"
" found {filtered_stops_count}"

@pytest.mark.runinteg
def test_bbox_filter_gtfs_to_date_builds_network(self, bbox_list, tmpdir):
"""Having this flagged as integration test as Java dependency."""
# import goes here to avoid Java warnings as in test_setup.py
import r5py

out_pth = os.path.join(tmpdir, "out.zip")
# filter to date of fixture ingest
bbox_filter_gtfs(
in_pth=GTFS_FIX_PTH,
out_pth=out_pth,
bbox=bbox_list,
filter_dates=["20230613"],
)
net = r5py.TransportNetwork(
osm_pbf=os.path.join(
"tests", "data", "newport-2023-06-13.osm.pbf"
),
gtfs=[out_pth],
)
assert isinstance(net, r5py.TransportNetwork)


class Test_AddValidationRow(object):
"""Tests for _add_validation_row()."""
Expand Down Expand Up @@ -203,3 +257,20 @@ def test_convert_pandas_to_plotly_on_pass(self, test_df):
"Expected type plotly.graph_objects.Figure but "
f"{type(fig_return)} found"
)


class Test_ValidateDatestring(object):
"""Tests for _validate_datestring."""

def test_validate_datestring_raises(self):
"""Check incompatible datestrings raise."""
with pytest.raises(
ValueError,
match="Incorrect date format, 2023-10-23 should be %Y%m%d",
):
_validate_datestring("2023-10-23")

def test_validate_datestring_on_pass(self):
"""Test that func passes if datestring matches specified form."""
out = _validate_datestring("2023-10-23", form="%Y-%m-%d")
assert isinstance(out, type(None))