Skip to content

Commit

Permalink
test: added transport_performance() unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
ethan-moss committed Nov 15, 2023
1 parent 23904e4 commit 9f2a6d0
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 2 deletions.
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@
*.zip
*.pkl
*.html
*.tif
*.parquet

# exception for test fixtures/package data
!tests/data/newport-2023-06-13.osm.pbf
!tests/data/chester-20230816-small_gtfs.zip
!tests/data/gtfs/newport-20230613_gtfs.zip
!src/transport_performance/data/gtfs/route_lookup.pkl
!tests/data/gtfs/report/html_template.html
!tests/data/metrics/mock_centroid_gdf.pkl
!tests/data/metrics/mock_pop_gdf.pkl
!tests/data/metrics/mock_raster_input.tif
!tests/data/metrics/mock_tt.parquet
!tests/data/metrics/mock_urban_centre.pkl

# exception for html templates
!src/transport_performance/data/gtfs/report/html_templates/evaluation_template.html
Expand Down Expand Up @@ -48,7 +55,6 @@ outputs/*
*.csv
*.rds
*.rda
*.parquet
*.arrow

# Office files
Expand All @@ -64,7 +70,6 @@ outputs/*
*.png
*.jpg
*.jpeg
*.tif
*.svg

# The following was created by https://www.toptal.com/developers/gitignore/api/macos,windows,r,python
Expand Down
88 changes: 88 additions & 0 deletions scripts/prep-metrics-fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Script to prepare dummy metrics unit test fixtures.
Uses `tests/data/metrics/input.tiff` as an input. This input corresponds to a
mock 4x4 100m gridded cell raster over the ONS Newport Office. A dummy urban
center boundary is also contructed, that reprents the centre 4 cells of the
4x4 grid. This mock urban centre boundary is then saved as a test fixture. All
these mock inputs are then feed into `RasterPop` to generate corresponding
`pop_gdf` and `centroids_gdf` fixtures. Finally, an OD travel time matrix is
mocked and saved as a parquet test fixture.
Travel times are constructed such that, when calculating the transport
performance with travel_time_threshold=3 and distance_threshold=0.11,
for each destination cell in the urban centre:
- accessible population = current cell + left and right hand cells.
- proximity population = current cell + left, right, below and above cells.
This is done to ensure effective testing of the threshold.
Note: changes made here will need to be reflected in the corresponding unit
tests that use them.
"""

import geopandas as gpd
import numpy as np
import pandas as pd

from pyprojroot import here
from shapely.geometry import Polygon

from transport_performance.population.rasterpop import RasterPop
from transport_performance.utils.io import to_pickle

# set path constants
INPUT_FIXTURE_PATH = here("tests/data/metrics/mock_raster_input.tif")
UC_FIXTURE_PATH = here("tests/data/metrics/mock_urban_centre.pkl")
POP_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_pop_gdf.pkl")
CENTROID_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_centroid_gdf.pkl")
TT_FIXTURE_PATH = here("tests/data/metrics/mock_tt.parquet")

# construct mock urban centre boundary and write fixture to file
coords = (
(-225700, 6036700),
(-225700, 6036500),
(-225500, 6036500),
(-225500, 6036700),
(-225700, 6036700),
)
uc_fixture = gpd.GeoDataFrame(
["vectorized_uc"],
geometry=[Polygon(coords)],
columns=["label"],
crs="ESRI:54009",
)
to_pickle(uc_fixture, UC_FIXTURE_PATH)

# construct pop_gdf and centroid_fixture
rp = RasterPop(INPUT_FIXTURE_PATH)
pop_fixture, centroid_fixture = rp.get_pop(
uc_fixture.loc[0, "geometry"].buffer(100, join_style=2),
urban_centre_bounds=uc_fixture.loc[0, "geometry"],
)

# generate population data with a fixed random seed for reproducibility
np.random.seed(42)
pop_fixture["population"] = np.random.randint(
1, len(pop_fixture) + 1, len(pop_fixture)
)

# save pop_gdf and centroid_gdf fixtures
to_pickle(pop_fixture, POP_GDF_FIXTURE_PATH)
to_pickle(centroid_fixture, CENTROID_GDF_FIXTURE_PATH)

# construct mock travel time data using ID differences as travel times
uc_ids = pop_fixture[pop_fixture.within_urban_centre].id.unique()
ids = np.arange(0, len(pop_fixture))

travel_times = []
for uc_id in uc_ids:
for id in ids:
travel_times.append([id, uc_id, abs(id - uc_id)])

# save tt fixture as a parquet file (required format)
tt_fixture = pd.DataFrame(
travel_times, columns=["from_id", "to_id", "travel_time"]
)
tt_fixture.to_parquet(TT_FIXTURE_PATH)
Binary file added tests/data/metrics/mock_centroid_gdf.pkl
Binary file not shown.
Binary file added tests/data/metrics/mock_pop_gdf.pkl
Binary file not shown.
Binary file added tests/data/metrics/mock_raster_input.tif
Binary file not shown.
Binary file added tests/data/metrics/mock_tt.parquet
Binary file not shown.
Binary file added tests/data/metrics/mock_urban_centre.pkl
Binary file not shown.
129 changes: 129 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""Tests for transport_performance/metrics.py."""

import geopandas as gpd
import pandas as pd
import pathlib
import pytest

from pandas.testing import assert_frame_equal
from pyprojroot import here

from transport_performance.metrics import transport_performance
from transport_performance.utils.io import from_pickle


@pytest.fixture(scope="class")
def uc_fixture() -> gpd.GeoDataFrame:
"""Retrieve mock urban centre test fixture."""
UC_FIXTURE_PATH = here("tests/data/metrics/mock_urban_centre.pkl")
return from_pickle(UC_FIXTURE_PATH)


@pytest.fixture(scope="class")
def centroid_gdf_fixture() -> gpd.GeoDataFrame:
"""Retrieve mock centroid_gdf test fixture."""
CENTROID_GDF_FIXTURE_PATH = here(
"tests/data/metrics/mock_centroid_gdf.pkl"
)
return from_pickle(CENTROID_GDF_FIXTURE_PATH)


@pytest.fixture(scope="class")
def pop_gdf_fixture() -> gpd.GeoDataFrame:
"""Retrieve mock pop_gdf test fixture."""
POP_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_pop_gdf.pkl")
return from_pickle(POP_GDF_FIXTURE_PATH)


@pytest.fixture(scope="class")
def tt_fixture() -> pathlib.Path:
"""Retrieve mock travel times test fixture."""
return here("tests/data/metrics/mock_tt.parquet")


class TestTransportPerformance:
"""Collection of tests for `transport_performance()` function."""

def test_transport_performance(
self, uc_fixture, centroid_gdf_fixture, pop_gdf_fixture, tt_fixture
) -> None:
"""Test main behaviour of transport performance function.
Parameters
----------
uc_fixture
A mock urban centre test fixture
centroid_gdf_fixture
A mock centroid test fixture
pop_gdf_fixture
A mock population test fixture
tt_fixture
A mock travel time test fixture
Notes
-----
1. See `scripts/prep-metrics-fixtures.py` for more details on the
input fixtures.
2. Expected results were manually calculated and QA-ed for this unit
test.
"""
# call transport_performance() using the test fixtures
tp_df, stats_df = transport_performance(
tt_fixture,
centroid_gdf_fixture,
pop_gdf_fixture,
travel_time_threshold=3,
distance_threshold=0.11,
urban_centre_name="name",
urban_centre_country="country",
urban_centre_gdf=uc_fixture,
)

# create expected transport performance and stats results
# log subset of columns to test against
TEST_COLS = [
"id",
"accessible_population",
"proximity_population",
"transport_performance",
]
expected_tp_df = pd.DataFrame(
[
[5, 32, 46, (32 / 46 * 100)],
[6, 26, 42, (26 / 42 * 100)],
[9, 20, 39, (20 / 39 * 100)],
[10, 20, 41, (20 / 41 * 100)],
],
columns=TEST_COLS,
)
expected_stats_df = pd.DataFrame(
[
[
"name",
"country",
0.04,
34,
expected_tp_df.transport_performance.min(),
expected_tp_df.transport_performance.quantile(0.25),
expected_tp_df.transport_performance.median(),
expected_tp_df.transport_performance.quantile(0.75),
expected_tp_df.transport_performance.max(),
],
],
columns=[
"urban centre name",
"urban centre country",
"urban centre area",
"urban centre population",
"min",
"25%",
"50%",
"75%",
"max",
],
)

# assert results are as expected
assert_frame_equal(tp_df[TEST_COLS], expected_tp_df)
assert_frame_equal(stats_df, expected_stats_df)

0 comments on commit 9f2a6d0

Please sign in to comment.