Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust groupby operations to align with Pandas release 2.1.0 #116

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 11 additions & 42 deletions src/transport_performance/gtfs/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
import geopandas as gpd
import folium
import datetime
import numpy as np
import os
import inspect

from transport_performance.gtfs.routes import scrape_route_type_lookup
from transport_performance.utils.defence import (
_is_expected_filetype,
_check_namespace_export,
_check_list,
_check_parent_dir_exists,
)

Expand Down Expand Up @@ -387,16 +385,16 @@ def _get_pre_processed_trips(self):

def _summary_defence(
self,
summ_ops: list = [np.min, np.max, np.mean, np.median],
summ_ops: list = ["min", "max", "mean", "median"],
return_summary: bool = True,
) -> None:
"""Check for any invalid parameters in a summarising function.

Parameters
----------
summ_ops : list, optional
A list of operators used to get a summary of a given day,
by default [np.min, np.max, np.mean, np.median]
A list of operators in string format used to get a summary of a
given day, by default ["min", "max", "mean", "median"]
return_summary : bool, optional
When True, a summary is returned. When False, route data
for each date is returned,
Expand All @@ -407,44 +405,16 @@ def _summary_defence(
None

"""
_check_list(ls=summ_ops, param_nm="summ_ops")
if not isinstance(return_summary, bool):
raise TypeError(
"'return_summary' must be of type boolean."
f" Found {type(return_summary)} : {return_summary}"
)
# summ_ops defence

if isinstance(summ_ops, list):
for i in summ_ops:
# updated for numpy >= 1.25.0, this check rules out cases
# that are not functions
if inspect.isfunction(i) or type(i).__module__ == "numpy":
if not _check_namespace_export(pkg=np, func=i):
raise TypeError(
"Each item in `summ_ops` must be a numpy function."
f" Found {type(i)} : {i.__name__}"
)
else:
raise TypeError(
(
"Each item in `summ_ops` must be a function."
f" Found {type(i)} : {i}"
)
)
elif inspect.isfunction(summ_ops):
if not _check_namespace_export(pkg=np, func=summ_ops):
raise NotImplementedError(
"`summ_ops` expects numpy functions only."
)
else:
raise TypeError(
"`summ_ops` expects a numpy function or list of numpy"
f" functions. Found {type(summ_ops)}"
)

def summarise_trips(
self,
summ_ops: list = [np.min, np.max, np.mean, np.median],
summ_ops: list = ["min", "max", "mean", "median"],
return_summary: bool = True,
) -> pd.DataFrame:
"""Produce a summarised table of trip statistics by day of week.
Expand All @@ -457,8 +427,8 @@ def summarise_trips(
Parameters
----------
summ_ops : list, optional
A list of operators used to get a summary of a given day,
by default [np.min, np.max, np.mean, np.median]
A list of operators in string format used to get a summary of a
given day, by default ["min", "max", "mean", "median"]
return_summary : bool, optional
When True, a summary is returned. When False, trip data
for each date is returned,
Expand Down Expand Up @@ -496,7 +466,6 @@ def summarise_trips(
day_trip_counts.reset_index(inplace=True)
day_trip_counts = day_trip_counts.round(0)

# order the days (for plotting future purposes)
# order the days (for plotting future purposes)
day_trip_counts = self._order_dataframe_by_day(df=day_trip_counts)
day_trip_counts.reset_index(drop=True, inplace=True)
Expand All @@ -505,7 +474,7 @@ def summarise_trips(

def summarise_routes(
self,
summ_ops: list = [np.min, np.max, np.mean, np.median],
summ_ops: list = ["min", "max", "mean", "median"],
return_summary: bool = True,
) -> pd.DataFrame:
"""Produce a summarised table of route statistics by day of week.
Expand All @@ -519,8 +488,8 @@ def summarise_routes(
Parameters
----------
summ_ops : list, optional
A list of operators used to get a summary of a given day,
by default [np.min, np.max, np.mean, np.median]
A list of operators in string format used to get a summary of a
given day, by default ["min", "max", "mean", "median"]
return_summary : bool, optional
When True, a summary is returned. When False, route data
for each date is returned,
Expand Down
4 changes: 2 additions & 2 deletions src/transport_performance/utils/defence.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ def _check_list(ls, param_nm, check_elements=True, exp_type=str):
if not isinstance(i, exp_type):
raise TypeError(
(
f"`{param_nm}` must contain {str(exp_type)} only."
f" Found {type(i)} : {i}"
f"`{param_nm}` must contain {exp_type} only."
f" Found {type(i)}"
)
)

Expand Down
69 changes: 27 additions & 42 deletions tests/gtfs/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,36 +341,29 @@ def test__preprocess_trips_and_routes(self, gtfs_fixture):

def test_summarise_trips_defence(self, gtfs_fixture):
"""Defensive checks for summarise_trips()."""
"""Defensive checks for summarise_routes()."""
# cases where a function is passed to summ_ops
with pytest.raises(
TypeError,
match="Each item in `summ_ops`.*. Found <class 'str'> : np.mean",
match="`summ_ops` must contain <class 'str'> only. Found <class "
"'numpy._ArrayFunctionDispatcher'>",
):
gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"])
# case where is function but not exported from numpy

def dummy_func():
"""Test case func."""
return None
gtfs_fixture.summarise_trips(summ_ops=[np.mean])

# cases where a list isn't passed to summ_ops
with pytest.raises(
TypeError,
match=(
"Each item in `summ_ops` must be a numpy function. Found"
" <class 'function'> : dummy_func"
),
):
gtfs_fixture.summarise_trips(summ_ops=[np.min, dummy_func])
# case where a single non-numpy func is being passed
with pytest.raises(
NotImplementedError,
match="`summ_ops` expects numpy functions only.",
match="`summ_ops` should be a list. Instead found <class 'str'>",
):
gtfs_fixture.summarise_trips(summ_ops=dummy_func)
gtfs_fixture.summarise_trips(summ_ops="tester")

# cases where in item passed to summ_ops is an invalid operator
with pytest.raises(
TypeError,
match="`summ_ops` expects a numpy function.*. Found <class 'int'>",
AttributeError,
match="'SeriesGroupBy' object has no attribute 'tester'",
):
gtfs_fixture.summarise_trips(summ_ops=38)
gtfs_fixture.summarise_trips(summ_ops=["tester"])

# cases where return_summary are not of type boolean
with pytest.raises(
TypeError,
Expand All @@ -387,36 +380,28 @@ def dummy_func():

def test_summarise_routes_defence(self, gtfs_fixture):
"""Defensive checks for summarise_routes()."""
# cases where a function is passed to summ_ops
with pytest.raises(
TypeError,
match="Each item in `summ_ops`.*. Found <class 'str'> : np.mean",
match="`summ_ops` must contain <class 'str'> only. Found <class "
"'numpy._ArrayFunctionDispatcher'>",
):
gtfs_fixture.summarise_trips(summ_ops=[np.mean, "np.mean"])
# case where is function but not exported from numpy

def dummy_func():
"""Test case func."""
return None
gtfs_fixture.summarise_routes(summ_ops=[np.mean])

# cases where a list isn't passed to summ_ops
with pytest.raises(
TypeError,
match=(
"Each item in `summ_ops` must be a numpy function. Found"
" <class 'function'> : dummy_func"
),
):
gtfs_fixture.summarise_routes(summ_ops=[np.min, dummy_func])
# case where a single non-numpy func is being passed
with pytest.raises(
NotImplementedError,
match="`summ_ops` expects numpy functions only.",
match="`summ_ops` should be a list. Instead found <class 'str'>",
):
gtfs_fixture.summarise_routes(summ_ops=dummy_func)
gtfs_fixture.summarise_routes(summ_ops="tester")

# cases where in item passed to summ_ops is an invalid operator
with pytest.raises(
TypeError,
match="`summ_ops` expects a numpy function.*. Found <class 'int'>",
AttributeError,
match="'SeriesGroupBy' object has no attribute 'tester'",
):
gtfs_fixture.summarise_routes(summ_ops=38)
gtfs_fixture.summarise_routes(summ_ops=["tester"])

# cases where return_summary are not of type boolean
with pytest.raises(
TypeError,
Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_defence.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test__check_list_elements(self):
TypeError,
match=(
"`mixed_list` must contain <class 'int'> only. Found "
"<class 'str'> : 2"
"<class 'str'>"
),
):
_check_list(
Expand Down