test: added transport_performance() unit test

datasciencecampus · Nov 15, 2023 · 9f2a6d0 · 9f2a6d0
1 parent 23904e4
commit 9f2a6d0
Show file tree

Hide file tree

Showing 8 changed files with 224 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,13 +6,20 @@
 *.zip
 *.pkl
 *.html
+*.tif
+*.parquet
 
 # exception for test fixtures/package data
 !tests/data/newport-2023-06-13.osm.pbf
 !tests/data/chester-20230816-small_gtfs.zip
 !tests/data/gtfs/newport-20230613_gtfs.zip
 !src/transport_performance/data/gtfs/route_lookup.pkl
 !tests/data/gtfs/report/html_template.html
+!tests/data/metrics/mock_centroid_gdf.pkl
+!tests/data/metrics/mock_pop_gdf.pkl
+!tests/data/metrics/mock_raster_input.tif
+!tests/data/metrics/mock_tt.parquet
+!tests/data/metrics/mock_urban_centre.pkl
 
 # exception for html templates
 !src/transport_performance/data/gtfs/report/html_templates/evaluation_template.html
@@ -48,7 +55,6 @@ outputs/*
 *.csv
 *.rds
 *.rda
-*.parquet
 *.arrow
 
 # Office files
@@ -64,7 +70,6 @@ outputs/*
 *.png
 *.jpg
 *.jpeg
-*.tif
 *.svg
 
 # The following was created by https://www.toptal.com/developers/gitignore/api/macos,windows,r,python

diff --git a/scripts/prep-metrics-fixtures.py b/scripts/prep-metrics-fixtures.py
@@ -0,0 +1,88 @@
+"""Script to prepare dummy metrics unit test fixtures.
+
+Uses `tests/data/metrics/input.tiff` as an input. This input corresponds to a
+mock 4x4 100m gridded cell raster over the ONS Newport Office. A dummy urban
+center boundary is also contructed, that reprents the centre 4 cells of the
+4x4 grid. This mock urban centre boundary is then saved as a test fixture. All
+these mock inputs are then feed into `RasterPop` to generate corresponding
+`pop_gdf` and `centroids_gdf` fixtures. Finally, an OD travel time matrix is
+mocked and saved as a parquet test fixture.
+
+Travel times are constructed such that, when calculating the transport
+performance with travel_time_threshold=3 and distance_threshold=0.11,
+for each destination cell in the urban centre:
+
+- accessible population = current cell + left and right hand cells.
+- proximity population = current cell + left, right, below and above cells.
+
+This is done to ensure effective testing of the threshold.
+
+Note: changes made here will need to be reflected in the corresponding unit
+tests that use them.
+
+"""
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+
+from pyprojroot import here
+from shapely.geometry import Polygon
+
+from transport_performance.population.rasterpop import RasterPop
+from transport_performance.utils.io import to_pickle
+
+# set path constants
+INPUT_FIXTURE_PATH = here("tests/data/metrics/mock_raster_input.tif")
+UC_FIXTURE_PATH = here("tests/data/metrics/mock_urban_centre.pkl")
+POP_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_pop_gdf.pkl")
+CENTROID_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_centroid_gdf.pkl")
+TT_FIXTURE_PATH = here("tests/data/metrics/mock_tt.parquet")
+
+# construct mock urban centre boundary and write fixture to file
+coords = (
+    (-225700, 6036700),
+    (-225700, 6036500),
+    (-225500, 6036500),
+    (-225500, 6036700),
+    (-225700, 6036700),
+)
+uc_fixture = gpd.GeoDataFrame(
+    ["vectorized_uc"],
+    geometry=[Polygon(coords)],
+    columns=["label"],
+    crs="ESRI:54009",
+)
+to_pickle(uc_fixture, UC_FIXTURE_PATH)
+
+# construct pop_gdf and centroid_fixture
+rp = RasterPop(INPUT_FIXTURE_PATH)
+pop_fixture, centroid_fixture = rp.get_pop(
+    uc_fixture.loc[0, "geometry"].buffer(100, join_style=2),
+    urban_centre_bounds=uc_fixture.loc[0, "geometry"],
+)
+
+# generate population data with a fixed random seed for reproducibility
+np.random.seed(42)
+pop_fixture["population"] = np.random.randint(
+    1, len(pop_fixture) + 1, len(pop_fixture)
+)
+
+# save pop_gdf and centroid_gdf fixtures
+to_pickle(pop_fixture, POP_GDF_FIXTURE_PATH)
+to_pickle(centroid_fixture, CENTROID_GDF_FIXTURE_PATH)
+
+# construct mock travel time data using ID differences as travel times
+uc_ids = pop_fixture[pop_fixture.within_urban_centre].id.unique()
+ids = np.arange(0, len(pop_fixture))
+
+travel_times = []
+for uc_id in uc_ids:
+    for id in ids:
+        travel_times.append([id, uc_id, abs(id - uc_id)])
+
+# save tt fixture as a parquet file (required format)
+tt_fixture = pd.DataFrame(
+    travel_times, columns=["from_id", "to_id", "travel_time"]
+)
+tt_fixture.to_parquet(TT_FIXTURE_PATH)
diff --git a/tests/data/metrics/mock_centroid_gdf.pkl b/tests/data/metrics/mock_centroid_gdf.pkl
diff --git a/tests/data/metrics/mock_pop_gdf.pkl b/tests/data/metrics/mock_pop_gdf.pkl
diff --git a/tests/data/metrics/mock_raster_input.tif b/tests/data/metrics/mock_raster_input.tif
diff --git a/tests/data/metrics/mock_tt.parquet b/tests/data/metrics/mock_tt.parquet
diff --git a/tests/data/metrics/mock_urban_centre.pkl b/tests/data/metrics/mock_urban_centre.pkl
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -0,0 +1,129 @@
+"""Tests for transport_performance/metrics.py."""
+
+import geopandas as gpd
+import pandas as pd
+import pathlib
+import pytest
+
+from pandas.testing import assert_frame_equal
+from pyprojroot import here
+
+from transport_performance.metrics import transport_performance
+from transport_performance.utils.io import from_pickle
+
+
+@pytest.fixture(scope="class")
+def uc_fixture() -> gpd.GeoDataFrame:
+    """Retrieve mock urban centre test fixture."""
+    UC_FIXTURE_PATH = here("tests/data/metrics/mock_urban_centre.pkl")
+    return from_pickle(UC_FIXTURE_PATH)
+
+
+@pytest.fixture(scope="class")
+def centroid_gdf_fixture() -> gpd.GeoDataFrame:
+    """Retrieve mock centroid_gdf test fixture."""
+    CENTROID_GDF_FIXTURE_PATH = here(
+        "tests/data/metrics/mock_centroid_gdf.pkl"
+    )
+    return from_pickle(CENTROID_GDF_FIXTURE_PATH)
+
+
+@pytest.fixture(scope="class")
+def pop_gdf_fixture() -> gpd.GeoDataFrame:
+    """Retrieve mock pop_gdf test fixture."""
+    POP_GDF_FIXTURE_PATH = here("tests/data/metrics/mock_pop_gdf.pkl")
+    return from_pickle(POP_GDF_FIXTURE_PATH)
+
+
+@pytest.fixture(scope="class")
+def tt_fixture() -> pathlib.Path:
+    """Retrieve mock travel times test fixture."""
+    return here("tests/data/metrics/mock_tt.parquet")
+
+
+class TestTransportPerformance:
+    """Collection of tests for `transport_performance()` function."""
+
+    def test_transport_performance(
+        self, uc_fixture, centroid_gdf_fixture, pop_gdf_fixture, tt_fixture
+    ) -> None:
+        """Test main behaviour of transport performance function.
+
+        Parameters
+        ----------
+        uc_fixture
+            A mock urban centre test fixture
+        centroid_gdf_fixture
+            A mock centroid test fixture
+        pop_gdf_fixture
+            A mock population test fixture
+        tt_fixture
+            A mock travel time test fixture
+
+        Notes
+        -----
+        1. See `scripts/prep-metrics-fixtures.py` for more details on the
+        input fixtures.
+        2. Expected results were manually calculated and QA-ed for this unit
+        test.
+
+        """
+        # call transport_performance() using the test fixtures
+        tp_df, stats_df = transport_performance(
+            tt_fixture,
+            centroid_gdf_fixture,
+            pop_gdf_fixture,
+            travel_time_threshold=3,
+            distance_threshold=0.11,
+            urban_centre_name="name",
+            urban_centre_country="country",
+            urban_centre_gdf=uc_fixture,
+        )
+
+        # create expected transport performance and stats results
+        # log subset of columns to test against
+        TEST_COLS = [
+            "id",
+            "accessible_population",
+            "proximity_population",
+            "transport_performance",
+        ]
+        expected_tp_df = pd.DataFrame(
+            [
+                [5, 32, 46, (32 / 46 * 100)],
+                [6, 26, 42, (26 / 42 * 100)],
+                [9, 20, 39, (20 / 39 * 100)],
+                [10, 20, 41, (20 / 41 * 100)],
+            ],
+            columns=TEST_COLS,
+        )
+        expected_stats_df = pd.DataFrame(
+            [
+                [
+                    "name",
+                    "country",
+                    0.04,
+                    34,
+                    expected_tp_df.transport_performance.min(),
+                    expected_tp_df.transport_performance.quantile(0.25),
+                    expected_tp_df.transport_performance.median(),
+                    expected_tp_df.transport_performance.quantile(0.75),
+                    expected_tp_df.transport_performance.max(),
+                ],
+            ],
+            columns=[
+                "urban centre name",
+                "urban centre country",
+                "urban centre area",
+                "urban centre population",
+                "min",
+                "25%",
+                "50%",
+                "75%",
+                "max",
+            ],
+        )
+
+        # assert results are as expected
+        assert_frame_equal(tp_df[TEST_COLS], expected_tp_df)
+        assert_frame_equal(stats_df, expected_stats_df)