From de3c736c284c83cdcde943f77d9248338dd66599 Mon Sep 17 00:00:00 2001 From: Browning Date: Fri, 1 Sep 2023 11:42:15 +0100 Subject: [PATCH 1/2] Update defences and type checking for summ_ops; update tests --- src/transport_performance/gtfs/validation.py | 53 ++++----------- src/transport_performance/utils/defence.py | 4 +- tests/gtfs/test_validation.py | 69 ++++++++------------ 3 files changed, 40 insertions(+), 86 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 12da36d7..b58be5bf 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -5,14 +5,12 @@ import geopandas as gpd import folium import datetime -import numpy as np import os -import inspect from transport_performance.gtfs.routes import scrape_route_type_lookup from transport_performance.utils.defence import ( _is_expected_filetype, - _check_namespace_export, + _check_list, _check_parent_dir_exists, ) @@ -387,7 +385,7 @@ def _get_pre_processed_trips(self): def _summary_defence( self, - summ_ops: list = [np.min, np.max, np.mean, np.median], + summ_ops: list = ["min", "max", "mean", "median"], return_summary: bool = True, ) -> None: """Check for any invalid parameters in a summarising function. @@ -395,8 +393,8 @@ def _summary_defence( Parameters ---------- summ_ops : list, optional - A list of operators used to get a summary of a given day, - by default [np.min, np.max, np.mean, np.median] + A list of operators in string format used to get a summary of a + given day, by default ["min", "max", "mean", "median"] return_summary : bool, optional When True, a summary is returned. When False, route data for each date is returned, @@ -407,44 +405,16 @@ def _summary_defence( None """ + _check_list(ls=summ_ops, param_nm="summ_ops") if not isinstance(return_summary, bool): raise TypeError( "'return_summary' must be of type boolean." f" Found {type(return_summary)} : {return_summary}" ) - # summ_ops defence - - if isinstance(summ_ops, list): - for i in summ_ops: - # updated for numpy >= 1.25.0, this check rules out cases - # that are not functions - if inspect.isfunction(i) or type(i).__module__ == "numpy": - if not _check_namespace_export(pkg=np, func=i): - raise TypeError( - "Each item in `summ_ops` must be a numpy function." - f" Found {type(i)} : {i.__name__}" - ) - else: - raise TypeError( - ( - "Each item in `summ_ops` must be a function." - f" Found {type(i)} : {i}" - ) - ) - elif inspect.isfunction(summ_ops): - if not _check_namespace_export(pkg=np, func=summ_ops): - raise NotImplementedError( - "`summ_ops` expects numpy functions only." - ) - else: - raise TypeError( - "`summ_ops` expects a numpy function or list of numpy" - f" functions. Found {type(summ_ops)}" - ) def summarise_trips( self, - summ_ops: list = [np.min, np.max, np.mean, np.median], + summ_ops: list = ["min", "max", "mean", "median"], return_summary: bool = True, ) -> pd.DataFrame: """Produce a summarised table of trip statistics by day of week. @@ -457,8 +427,8 @@ def summarise_trips( Parameters ---------- summ_ops : list, optional - A list of operators used to get a summary of a given day, - by default [np.min, np.max, np.mean, np.median] + A list of operators in string format used to get a summary of a + given day, by default ["min", "max", "mean", "median"] return_summary : bool, optional When True, a summary is returned. When False, trip data for each date is returned, @@ -496,7 +466,6 @@ def summarise_trips( day_trip_counts.reset_index(inplace=True) day_trip_counts = day_trip_counts.round(0) - # order the days (for plotting future purposes) # order the days (for plotting future purposes) day_trip_counts = self._order_dataframe_by_day(df=day_trip_counts) day_trip_counts.reset_index(drop=True, inplace=True) @@ -505,7 +474,7 @@ def summarise_trips( def summarise_routes( self, - summ_ops: list = [np.min, np.max, np.mean, np.median], + summ_ops: list = ["min", "max", "mean", "median"], return_summary: bool = True, ) -> pd.DataFrame: """Produce a summarised table of route statistics by day of week. @@ -519,8 +488,8 @@ def summarise_routes( Parameters ---------- summ_ops : list, optional - A list of operators used to get a summary of a given day, - by default [np.min, np.max, np.mean, np.median] + A list of operators in string format used to get a summary of a + given day, by default ["min", "max", "mean", "median"] return_summary : bool, optional When True, a summary is returned. When False, route data for each date is returned, diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 5d60d06f..5be3d0f3 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -202,8 +202,8 @@ def _check_list(ls, param_nm, check_elements=True, exp_type=str): if not isinstance(i, exp_type): raise TypeError( ( - f"`{param_nm}` must contain {str(exp_type)} only." - f" Found {type(i)} : {i}" + f"`{param_nm}` must contain {exp_type} only." + f" Found {type(i)}" ) ) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index 5ebd7ee5..9cd85591 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -341,36 +341,29 @@ def test__preprocess_trips_and_routes(self, gtfs_fixture): def test_summarise_trips_defence(self, gtfs_fixture): """Defensive checks for summarise_trips().""" + """Defensive checks for summarise_routes().""" + # cases where a function is passed to summ_ops with pytest.raises( TypeError, - match="Each item in `summ_ops`.*. Found : np.mean", + match="`summ_ops` must contain only. Found ", ): - gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) - # case where is function but not exported from numpy - - def dummy_func(): - """Test case func.""" - return None + gtfs_fixture.summarise_trips(summ_ops=[np.mean]) + # cases where a list isn't passed to summ_ops with pytest.raises( TypeError, - match=( - "Each item in `summ_ops` must be a numpy function. Found" - " : dummy_func" - ), - ): - gtfs_fixture.summarise_trips(summ_ops=[np.min, dummy_func]) - # case where a single non-numpy func is being passed - with pytest.raises( - NotImplementedError, - match="`summ_ops` expects numpy functions only.", + match="`summ_ops` should be a list. Instead found ", ): - gtfs_fixture.summarise_trips(summ_ops=dummy_func) + gtfs_fixture.summarise_trips(summ_ops="tester") + + # cases where in item passed to summ_ops is an invalid operator with pytest.raises( - TypeError, - match="`summ_ops` expects a numpy function.*. Found ", + AttributeError, + match="'SeriesGroupBy' object has no attribute 'tester'", ): - gtfs_fixture.summarise_trips(summ_ops=38) + gtfs_fixture.summarise_trips(summ_ops=["tester"]) + # cases where return_summary are not of type boolean with pytest.raises( TypeError, @@ -387,36 +380,28 @@ def dummy_func(): def test_summarise_routes_defence(self, gtfs_fixture): """Defensive checks for summarise_routes().""" + # cases where a function is passed to summ_ops with pytest.raises( TypeError, - match="Each item in `summ_ops`.*. Found : np.mean", + match="`summ_ops` must contain only. Found ", ): - gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"]) - # case where is function but not exported from numpy - - def dummy_func(): - """Test case func.""" - return None + gtfs_fixture.summarise_routes(summ_ops=[np.mean]) + # cases where a list isn't passed to summ_ops with pytest.raises( TypeError, - match=( - "Each item in `summ_ops` must be a numpy function. Found" - " : dummy_func" - ), - ): - gtfs_fixture.summarise_routes(summ_ops=[np.min, dummy_func]) - # case where a single non-numpy func is being passed - with pytest.raises( - NotImplementedError, - match="`summ_ops` expects numpy functions only.", + match="`summ_ops` should be a list. Instead found ", ): - gtfs_fixture.summarise_routes(summ_ops=dummy_func) + gtfs_fixture.summarise_routes(summ_ops="tester") + + # cases where in item passed to summ_ops is an invalid operator with pytest.raises( - TypeError, - match="`summ_ops` expects a numpy function.*. Found ", + AttributeError, + match="'SeriesGroupBy' object has no attribute 'tester'", ): - gtfs_fixture.summarise_routes(summ_ops=38) + gtfs_fixture.summarise_routes(summ_ops=["tester"]) + # cases where return_summary are not of type boolean with pytest.raises( TypeError, From 65445912998020e2f9fd57afb3576bfffc607d48 Mon Sep 17 00:00:00 2001 From: Browning Date: Fri, 1 Sep 2023 11:59:45 +0100 Subject: [PATCH 2/2] fix: Update defency.py tests --- tests/utils/test_defence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_defence.py b/tests/utils/test_defence.py index 6bd4b992..98fc9ed0 100644 --- a/tests/utils/test_defence.py +++ b/tests/utils/test_defence.py @@ -28,7 +28,7 @@ def test__check_list_elements(self): TypeError, match=( "`mixed_list` must contain only. Found " - " : 2" + "" ), ): _check_list(