Route choice docs and adjusting (#532)

* fixes example (#530) * documentation * docs * Changing API * Docs * Docs * Example for choice set generation * Example for choice set generation * Image thumbnail for notebook * Map for example * Map for example * . * Invert probabilities, cut-off now includes only above it, not below * Clarifies notebook * Clarifies notebook * updates CI * Fix tests for probability cutoff * Support disconnect OD pairs * Fix select link not using filtered graph * Use more copies to avoid link loading issues (hopefully) * Simplifies return of link loading * Makes scheduling of parallel jobs more aggressive (each individual job is very quick, so the overhead is negligible and potential for load balance is huge) * randomizes inputs for load balancing * removes reference to theta as a utility function parameter * removes reference to theta as a utility function parameter * Add missing negation and remove theta parameter from tests * ci test * ci test * Revert "ci test" This reverts commit a34a497. * Revert "ci test" This reverts commit 4eb8cd1. * CI * CI * CI * CI * Documentation icons * Include comments as docs * Add some detail to the modelling with aeq route choice docs * response to comments * . * . * string format * . * . * parameter clarification * move comment one line up for clarity --------- Co-authored-by: Renata Imai <[email protected]> Co-authored-by: pveigadecamargo <[email protected]> Co-authored-by: Jake-Moss <[email protected]> Co-authored-by: Renata Imai <[email protected]> Co-authored-by: Jan Zill <[email protected]>
AequilibraE · Jun 16, 2024 · c02a22d · c02a22d
1 parent 4697bdb
commit c02a22d
Show file tree

Hide file tree

Showing 38 changed files with 635 additions and 111 deletions.
diff --git a/.github/build_artifacts_qgis.yml b/.github/build_artifacts_qgis.yml
@@ -21,7 +21,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Set Python environment
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
         architecture: ${{ matrix.architecture }}

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -17,7 +17,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python 3.10
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.10'
     - name: Install dependencies

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -8,7 +8,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set Python environment
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: 3.9
           architecture: x64
@@ -38,7 +38,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Set Python environment
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies

diff --git a/aequilibrae/paths/basic_path_finding.pyx b/aequilibrae/paths/basic_path_finding.pyx
@@ -348,7 +348,9 @@ cpdef int path_finding(long origin,
             vert_state = pqueue.Elements[head_vert_idx].state
             if vert_state != SCANNED:
                 head_vert_val = tail_vert_val + graph_costs[idx]
-                if vert_state == NOT_IN_HEAP:
+                if head_vert_val == INFINITY:
+                    continue
+                elif vert_state == NOT_IN_HEAP:
                     insert(&pqueue, head_vert_idx, head_vert_val)
                     pred[head_vert_idx] = tail_vert_idx
                     connectors[head_vert_idx] = ids[idx]

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
@@ -22,9 +22,9 @@ class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
 
     default_paramaters = {
-        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 1.0},
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 0.0},
         "link-penalisation": {},
-        "bfsle": {"beta": 1.0, "theta": 1.0, "penalty": 1.0},
+        "bfsle": {"penalty": 1.0},
     }
 
     def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None):
@@ -75,7 +75,7 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
 
         Setting the parameters for the route choice:
 
-        `beta`, `theta`, and `seed` are BFSLE specific parameters.
+        `seed` is a BFSLE specific parameters.
 
         Setting `max_depth` or `max_misses`, while not required, is strongly recommended to prevent runaway algorithms.
         `max_misses` is the maximum amount of duplicate routes found per OD pair. If it is exceeded then the route set
@@ -104,8 +104,9 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
         excluded from the PSL calculations. The route is still returned, but with a probability of 0.0.
 
         The `cutoff_prob` should be in the range [0, 1]. It is then rescaled internally to [0.5, 1] as probabilities
-        below 0.5 produce negative differences in utilities. A higher `cutoff_prob` includes more routes. A value of
-        `0.0` will only include the minimum cost route. A value of `1.0` includes all routes.
+        below 0.5 produce negative differences in utilities because the choice is between two routes only, one of
+        which is the shortest path. A higher `cutoff_prob` includes less routes. A value of `1.0` will only include
+        the minimum cost route. A value of `0.0` includes all routes.
 
         :Arguments:
             **algorithm** (:obj:`str`): Algorithm to be used
@@ -222,7 +223,7 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool
             **self.parameters,
         )
 
-    def execute(self, perform_assignment: bool = False) -> None:
+    def execute(self, perform_assignment: bool = True) -> None:
         """
         Generate route choice sets between the previously supplied nodes, potentially performing an assignment.
 
@@ -310,10 +311,16 @@ def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]:
 
         return self.results
 
-    def get_load_results(self) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
+    def get_load_results(
+        self, compressed_graph_results=False
+    ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
         """
         Translates the link loading results from the graph format into the network format.
 
+        :Arguments:
+            **compressed_graph_results** (:obj:`bool`): Whether we should return assignment results for the
+            compressed graph. Only use this option if you are SURE you know what you are doing. Default `False`.
+
         :Returns:
             **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
                 A tuple of uncompressed and compressed link loading results as DataFrames.
@@ -339,20 +346,21 @@ def get_load_results(self) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFr
         )
         compact_lids = np.unique(self.graph.compact_graph.link_id.values)
         compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.compact_link_loads)
-
-        return uncompressed_df, compressed_df
+        if compressed_graph_results:
+            return compressed_df
+        return uncompressed_df
 
     def __link_loads_to_df(self, mapping, lids, link_loads):
         df = pd.DataFrame(
             {"link_id": lids} | {k + dir: np.zeros(lids.shape) for k in link_loads.keys() for dir in ["_ab", "_ba"]}
         )
         for k, v in link_loads.items():
             # Directional Flows
-            df[k + "_ab"].values[mapping.network_ab_idx] = np.nan_to_num(v[mapping.graph_ab_idx])
-            df[k + "_ba"].values[mapping.network_ba_idx] = np.nan_to_num(v[mapping.graph_ba_idx])
+            df.iloc[mapping.network_ab_idx, df.columns.get_loc(k + "_ab")] = np.nan_to_num(v[mapping.graph_ab_idx])
+            df.iloc[mapping.network_ba_idx, df.columns.get_loc(k + "_ba")] = np.nan_to_num(v[mapping.graph_ba_idx])
 
             # Tot Flow
-            df[k + "_tot"] = np.nan_to_num(df[k + "_ab"].values) + np.nan_to_num(df[k + "_ba"].values)
+            df[k + "_tot"] = df[k + "_ab"] + df[k + "_ba"]
 
         return df
 
@@ -485,26 +493,17 @@ def save_link_flows(self, table_name: str, project=None) -> None:
         if not project:
             project = self.project or get_active_project()
 
-        u, c = self.get_load_results()
+        df = self.get_load_results()
         info = self.info()
         self.__save_dataframe(
-            u,
+            df,
             "Link loading",
             "Uncompressed link loading results",
             table_name + "_uncompressed",
             info,
             project=project,
         )
 
-        self.__save_dataframe(
-            c,
-            "Link loading",
-            "Compressed link loading results",
-            table_name + "_compressed",
-            info,
-            project=project,
-        )
-
     def save_select_link_flows(self, table_name: str, project=None) -> None:
         """
         Saves the select link link flows for all classes into the results database. Additionally, it exports

diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
@@ -217,7 +217,7 @@ cdef class RouteChoiceSet:
     cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil
 
     @staticmethod
-    cdef vector[bool] *compute_mask(RouteSet_t *route_sets, double cutoff_prob, vector[double] &total_cost) noexcept nogil
+    cdef vector[bool] *compute_mask(double cutoff_prob, vector[double] &total_cost) noexcept nogil
 
     @staticmethod
     cdef vector[double] *compute_path_overlap(
@@ -233,8 +233,7 @@ cdef class RouteChoiceSet:
         vector[double] &total_cost,
         vector[double] &path_overlap_vec,
         vector[bool] &route_mask,
-        double beta,
-        double theta
+        double beta
     ) noexcept nogil
 
     @staticmethod

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
@@ -21,6 +21,7 @@ from openmp cimport omp_get_max_threads
 
 from libc.stdio cimport fprintf, stderr
 
+import random
 import itertools
 import logging
 import pathlib
@@ -141,6 +142,8 @@ cdef class RouteChoiceSet:
         self.a_star = False
 
         self.ids_graph_view = graph.compact_graph.id.values
+
+        # We explicitly don't want the links that have been removed from the graph
         self.graph_compressed_id_view = graph.graph.__compressed_id__.values
         self.num_nodes = graph.compact_num_nodes
         self.num_links = graph.compact_num_links
@@ -254,8 +257,7 @@ cdef class RouteChoiceSet:
             where: Optional[str] = None,
             path_size_logit: bool = False,
             beta: float = 1.0,
-            theta: float = 1.0,
-            cutoff_prob: float = 1.0,
+            cutoff_prob: float = 0.0,
     ):
         """Compute the a route set for a list of OD pairs.
 
@@ -288,8 +290,8 @@ cdef class RouteChoiceSet:
         if max_routes < 0 or max_depth < 0:
             raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative")
 
-        if path_size_logit and (beta < 0 or theta <= 0):
-            raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model")
+        if path_size_logit and beta < 0:
+            raise ValueError("`beta` must be >= 0 for path sized logit model")
 
         if path_size_logit and not 0.0 <= cutoff_prob <= 1.0:
             raise ValueError("`cutoff_prob` must be 0 <= `cutoff_prob` <= 1 for path sized logit model")
@@ -309,7 +311,7 @@ cdef class RouteChoiceSet:
             unsigned int c_cores = cores if cores > 0 else omp_get_max_threads()
 
             # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values.
-            double scaled_cutoff_prob = cutoff_prob * 0.5 + 0.5
+            double scaled_cutoff_prob = (1.0 - cutoff_prob) * 0.5 + 0.5
 
             vector[pair[long long, long long]] c_ods
 
@@ -339,7 +341,8 @@ cdef class RouteChoiceSet:
         else:
             _reached_first_matrix = np.zeros((c_cores, self.num_nodes + 1), dtype=np.int64)
 
-        set_ods = set(ods)
+        # Shuffling the jobs improves load balancing where nodes pairs are geographically ordered
+        set_ods = list(set(ods))
         if len(set_ods) != len(ods):
             warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs")
 
@@ -348,10 +351,11 @@ cdef class RouteChoiceSet:
                 where,
                 self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"]
             )
-            batches = list(Checkpoint.batches(list(set_ods)))
+            batches = list(Checkpoint.batches(set_ods))
             max_results_len = <size_t>max(len(batch) for batch in batches)
         else:
-            batches = [list(set_ods)]
+            random.shuffle(set_ods)
+            batches = [set_ods]
             max_results_len = len(set_ods)
 
         results = new vector[RouteSet_t *](max_results_len)
@@ -398,7 +402,7 @@ cdef class RouteChoiceSet:
                 prob_set.resize(batch_len)
 
             with nogil, parallel(num_threads=c_cores):
-                for i in prange(batch_len):
+                for i in prange(batch_len, schedule= "dynamic", chunksize=1):
                     origin_index = self.nodes_to_indices_view[c_ods[i].first]
                     dest_index = self.nodes_to_indices_view[c_ods[i].second]
 
@@ -447,7 +451,7 @@ cdef class RouteChoiceSet:
 
                     if path_size_logit:
                         d(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view)
-                        d(mask_set)[i] = RouteChoiceSet.compute_mask(route_set, scaled_cutoff_prob, d(d(cost_set)[i]))
+                        d(mask_set)[i] = RouteChoiceSet.compute_mask(scaled_cutoff_prob, d(d(cost_set)[i]))
 
                         freq_pair = RouteChoiceSet.compute_frequency(route_set, d(d(mask_set)[i]))
                         d(link_union_set)[i] = freq_pair.first
@@ -462,8 +466,7 @@ cdef class RouteChoiceSet:
                             d(d(cost_set)[i]),
                             d(d(path_overlap_set)[i]),
                             d(d(mask_set)[i]),
-                            beta,
-                            theta
+                            beta
                         )
                         # While we need the unique sorted links (.first), we don't need the frequencies (.second)
                         del freq_pair.second
@@ -895,7 +898,7 @@ cdef class RouteChoiceSet:
     @cython.boundscheck(False)
     @cython.initializedcheck(False)
     @staticmethod
-    cdef vector[bool] *compute_mask(RouteSet_t *route_set, double cutoff_prob, vector[double] &total_cost) noexcept nogil:
+    cdef vector[bool] *compute_mask(double cutoff_prob, vector[double] &total_cost) noexcept nogil:
         """
         Computes a binary logit between the minimum cost path and each path, if the total cost is greater than the
         minimum + the difference in utilities required to produce the cut-off probability then the route is excluded from
@@ -914,7 +917,9 @@ cdef class RouteChoiceSet:
             d(route_mask)[i] = (total_cost[i] <= cutoff_cost)
 
         # Always include the min element. It should already be but I don't trust floating math to do this correctly.
-        d(route_mask)[min - total_cost.cbegin()] = True
+        # But only if there actually was a min element (i.e. empty route set)
+        if min != total_cost.cend():
+            d(route_mask)[min - total_cost.cbegin()] = True
 
         return route_mask
 
@@ -982,8 +987,7 @@ cdef class RouteChoiceSet:
         vector[double] &total_cost,
         vector[double] &path_overlap_vec,
         vector[bool] &route_mask,
-        double beta,
-        double theta
+        double beta
     ) noexcept nogil:
         """Compute a probability for each route in the route set based on the path overlap."""
         cdef:
@@ -1009,7 +1013,7 @@ cdef class RouteChoiceSet:
                 if path_overlap_vec[i] == 0.0:
                     fprintf(stderr, "path_overlap_vec[%ld] == 0.0\n", i)
                 inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \
-                    * exp(-theta * (total_cost[j] - total_cost[i]))
+                    * exp((total_cost[i] - total_cost[j]))  # Assuming theta=1.0
 
             if inv_prob == 0.0:
                 fprintf(stderr, "inv_prob == 0.0\n")
@@ -1070,19 +1074,17 @@ cdef class RouteChoiceSet:
 
     cdef apply_link_loading_func(RouteChoiceSet self, vector[double] *ll, int cores):
         """Helper function for link_loading."""
-        # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without
-        # transferring ownership.
-        compressed = <double[:ll.size(), :1]>&d(ll)[0]
-
+        compressed = np.hstack([d(ll), [0.0]]).reshape(ll.size() + 1, 1)
         actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
+
         assign_link_loads_cython(
             actual,
             compressed,
             self.graph_compressed_id_view,
             cores
         )
-        compressed = np.array(compressed, copy=True)
-        return actual.reshape(-1), compressed.reshape(-1)
+
+        return actual.reshape(-1), compressed[:-1].reshape(-1)
 
     @cython.boundscheck(False)
     @cython.wraparound(False)

diff --git a/aequilibrae/reference_files/coquimbo.zip b/aequilibrae/reference_files/coquimbo.zip
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -77,7 +77,7 @@
             "examples/trip_distribution",
             "examples/visualization",
             "examples/aequilibrae_without_a_model",
-            "examples/full_workflows",
+            "examples/assignment_workflows",
             "examples/other_applications",
         ]
     ),

diff --git a/docs/source/examples/aequilibrae_without_a_model/plot_assignment_without_model.py b/docs/source/examples/aequilibrae_without_a_model/plot_assignment_without_model.py
@@ -21,6 +21,7 @@
 from aequilibrae.paths import Graph
 from aequilibrae.paths import TrafficAssignment
 from aequilibrae.paths.traffic_class import TrafficClass
+# sphinx_gallery_thumbnail_path = 'images/assignment_plot.png'
 
 # %%
 # We load the example file from the GMNS GitHub repository

diff --git a/...amples/full_workflows/plot_forecasting.py → .../assignment_workflows/plot_forecasting.py b/...amples/full_workflows/plot_forecasting.py → .../assignment_workflows/plot_forecasting.py
diff --git a/...rkflows/plot_public_transit_assignment.py → ...rkflows/plot_public_transit_assignment.py b/...rkflows/plot_public_transit_assignment.py → ...rkflows/plot_public_transit_assignment.py
@@ -26,6 +26,7 @@
 # Imports for SF transit graph construction
 from aequilibrae.project.database_connection import database_connection
 from aequilibrae.transit.transit_graph_builder import TransitGraphBuilder
+# sphinx_gallery_thumbnail_path = 'images/hyperpath_bell_n_10_alpha_100d0.png'
 
 # %%
 # Let's create an empty project on an arbitrary folder.