From 2efc8efc2dd3490e7331f232415126ad996a93af Mon Sep 17 00:00:00 2001 From: Jamie Cook Date: Mon, 5 Feb 2024 10:47:32 +1000 Subject: [PATCH 01/33] Fix minor typos (#505) * Update graph.py * . --- aequilibrae/paths/graph.py | 2 +- aequilibrae/paths/graph_building.pyx | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index bf307d87a..26e37d13c 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -24,7 +24,7 @@ class GraphBase(ABC): # noqa: B024 with degrees of two. This compresses long streams of links, such as along highways or curved roads, into single links. Dead end removal attempts to remove dead ends and fish spines from the network. It does this based on the observation - that in a graph with non-negative weights a dead end will over ever appear in the results of a short(est) path if the + that in a graph with non-negative weights a dead end will only ever appear in the results of a short(est) path if the origin or destination is present within that dead end. Dead end removal is applied before link contraction and does not create a strictly topological equivalent graph, diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx index 1bb816b1e..4bd13c85e 100644 --- a/aequilibrae/paths/graph_building.pyx +++ b/aequilibrae/paths/graph_building.pyx @@ -32,8 +32,8 @@ cdef void _remove_dead_ends( # - nodes for which all out going links point to the same node and has the same number of back links. # This is a generalisation of dead-ends formed by a node with a single bidirectional link to another # - # Removal: At nodes of interest we begin a BFS search for links we can remove. It's uncommon to the BFS to extend far as the stopping - # criteria and conditions for expansion are very similar, often this just searches in along a line. + # Removal: At nodes of interest we begin a Breadth First Search (BFS) for links we can remove. It's uncommon for the BFS to extend far as the stopping + # criteria and conditions for expansion are very similar, often this just searches along a line. # For removal the node must have "no choices", that is, all outgoing edges point to the same node *and* all incoming edges can be account for as # from that same node. # The criteria for expansion is that there are no incoming edges *and* all outgoing edges point to the same node. @@ -68,16 +68,16 @@ cdef void _remove_dead_ends( continue ### Propagation - # We now know that he node we are looking at has a mix of incoming and outgoing edges, i.e. in_degree[node] > 0 and out_degree[node] > 0 + # We now know that the node we are looking at has a mix of incoming and outgoing edges, i.e. in_degree[node] > 0 and out_degree[node] > 0 # That implies that this node is reachable from some other node. We now need to assess if this node would ever be considered in pathfinding. - # To be considered there needs to be some form of real choice, i.e. there needs to be multiple ways to leave a new node. If the only way + # To be considered, there needs to be some form of real choice, i.e. there needs to be multiple ways to leave a new node. If the only way # to leave this node is back the way we came then the cost of the path # ... -> pre -> node -> pre -> ... # is greater than that of # ... -> pre -> ... # so we can remove this link to node. This is because negative cost cycles are disallowed in path finding. # - # We don't remove the case were the is only one new node that could be used to leave as it is handled in the graph compression. + # We don't remove the case where there is only one new node that could be used to leave as it is handled in the graph compression. # It would however, be possible to handle that here as well. # If all the outgoing edges are to the same node *and* all incoming edges come from that node, then there is no real choice and the From a23270f57c2adac375ddbd404a68f7681567bb40 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 07:10:24 +1000 Subject: [PATCH 02/33] Processing with chunking --- .../distribution/gravity_application.py | 18 +++--- aequilibrae/distribution/ipf.py | 4 -- aequilibrae/matrix/aequilibrae_matrix.py | 63 +++++++------------ aequilibrae/paths/network_skimming.py | 3 - aequilibrae/paths/traffic_assignment.py | 4 -- 5 files changed, 31 insertions(+), 61 deletions(-) diff --git a/aequilibrae/distribution/gravity_application.py b/aequilibrae/distribution/gravity_application.py index 08bf67be1..4eefadb89 100644 --- a/aequilibrae/distribution/gravity_application.py +++ b/aequilibrae/distribution/gravity_application.py @@ -1,5 +1,4 @@ import glob -import importlib.util as iutil import logging import os import tempfile @@ -15,9 +14,6 @@ from aequilibrae.distribution.synthetic_gravity_model import SyntheticGravityModel from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class GravityApplication: """Applies a synthetic gravity model. @@ -320,15 +316,15 @@ def __apply_function(self): elif self.model.function == "POWER": # self.output.matrices[self.core_name][i, :] = (np.power(self.impedance.matrix_view[i, :, 0], - self.model.alpha) * p * a)[:] self.output.matrix_view[i, :] = (np.power(self.impedance.matrix_view[i, :], -self.model.alpha) * p * a)[ - : - ] + : + ] elif self.model.function == "GAMMA": self.output.matrix_view[i, :] = ( - np.power(self.impedance.matrix_view[i, :], self.model.alpha) - * np.exp(-self.model.beta * self.impedance.matrix_view[i, :]) - * p - * a - )[:] + np.power(self.impedance.matrix_view[i, :], self.model.alpha) + * np.exp(-self.model.beta * self.impedance.matrix_view[i, :]) + * p + * a + )[:] # Deals with infinite and NaNs infinite = np.isinf(self.output.matrix_view[:, :]).astype(int) diff --git a/aequilibrae/distribution/ipf.py b/aequilibrae/distribution/ipf.py index 2378c40a4..5d3d45dcc 100644 --- a/aequilibrae/distribution/ipf.py +++ b/aequilibrae/distribution/ipf.py @@ -1,4 +1,3 @@ -import importlib.util as iutil import os from datetime import datetime from time import perf_counter @@ -12,9 +11,6 @@ from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData from aequilibrae.project.data.matrix_record import MatrixRecord -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class Ipf: """Iterative proportional fitting procedure diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 13fc3c492..99d1431fa 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -1,5 +1,4 @@ import functools -import importlib.util as iutil import os import tempfile import uuid @@ -14,10 +13,6 @@ from scipy.sparse import coo_matrix # Checks if we can display OMX -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None -if has_omx: - import openmatrix as omx # CONSTANTS VERSION = 1 # VERSION OF THE MATRIX FORMAT @@ -54,9 +49,7 @@ # Offset: 18 + 50*cores + Y*20 | 18 + 50*cores + Y*20 + Y*zones*8 | -matrix_export_types = ["Aequilibrae matrix (*.aem)", "Comma-separated file (*.csv)"] -if has_omx: - matrix_export_types.append("Open matrix (*.omx)") +matrix_export_types = ["Open matrix (*.omx), Aequilibrae matrix (*.aem)", "Comma-separated file (*.csv)"] class AequilibraeMatrix(object): @@ -160,14 +153,14 @@ def __save_as(self, file_name: str, cores: List[str]): omx_mat.close() def create_empty( - self, - file_name: str = None, - zones: int = None, - matrix_names: List[str] = None, - data_type: np.dtype = np.float64, - index_names: List[str] = None, - compressed: bool = False, - memory_only: bool = True, + self, + file_name: str = None, + zones: int = None, + matrix_names: List[str] = None, + data_type: np.dtype = np.float64, + index_names: List[str] = None, + compressed: bool = False, + memory_only: bool = True, ): """ Creates an empty matrix in the AequilibraE format @@ -286,14 +279,14 @@ def get_matrix(self, core: str, copy=False) -> np.ndarray: return mat def create_from_omx( - self, - file_path: str, - omx_path: str, - cores: List[str] = None, - mappings: List[str] = None, - robust: bool = True, - compressed: bool = False, - memory_only: bool = True, + self, + file_path: str, + omx_path: str, + cores: List[str] = None, + mappings: List[str] = None, + robust: bool = True, + compressed: bool = False, + memory_only: bool = True, ) -> None: """ Creates an AequilibraeMatrix from an original OpenMatrix @@ -336,10 +329,6 @@ def robust_name(input_name: str, max_length: int, forbiden_names: List[str]) -> if trial_name not in forbiden_names: return trial_name - if not has_omx: - print("Open Matrix is not installed. Cannot continue") - return - if compressed: raise Warning("Matrix compression not yet supported") @@ -788,10 +777,6 @@ def export(self, output_name: str, cores: List[str] = None): """ fname, file_extension = os.path.splitext(output_name.upper()) - if file_extension == ".OMX": - if not has_omx: - raise ValueError("Open Matrix is not installed. Cannot continue") - if file_extension not in [".AEM", ".CSV", ".OMX"]: raise NotImplementedError(f"File extension {file_extension} not implemented yet") @@ -917,15 +902,15 @@ def computational_view(self, core_list: List[str] = None): self.matrix_view = self.matrices[:, :, idx1] elif len(core_list) > 1: idx2 = self.names.index(core_list[-1]) - self.matrix_view = self.matrices[:, :, idx1 : idx2 + 1] + self.matrix_view = self.matrices[:, :, idx1: idx2 + 1] def copy( - self, - output_name: str = None, - cores: List[str] = None, - names: List[str] = None, - compress: bool = None, - memory_only: bool = True, + self, + output_name: str = None, + cores: List[str] = None, + names: List[str] = None, + compress: bool = None, + memory_only: bool = True, ): """ Copies a list of cores (or all cores) from one matrix file to another one diff --git a/aequilibrae/paths/network_skimming.py b/aequilibrae/paths/network_skimming.py index 9745fa52e..279518e42 100644 --- a/aequilibrae/paths/network_skimming.py +++ b/aequilibrae/paths/network_skimming.py @@ -22,9 +22,6 @@ if pyqt: from PyQt5.QtCore import pyqtSignal -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - sys.dont_write_bytecode = True diff --git a/aequilibrae/paths/traffic_assignment.py b/aequilibrae/paths/traffic_assignment.py index a83b120d7..e24f16709 100644 --- a/aequilibrae/paths/traffic_assignment.py +++ b/aequilibrae/paths/traffic_assignment.py @@ -1,4 +1,3 @@ -import importlib.util as iutil import logging import socket import sqlite3 @@ -23,9 +22,6 @@ from aequilibrae.paths.vdf import VDF, all_vdf_functions from aequilibrae.project.database_connection import database_connection -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class AssignmentBase(ABC): def __init__(self, project=None): From 560728fbc71b92278b41a571009f0bfaf184720d Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 16:23:29 +1000 Subject: [PATCH 03/33] . --- aequilibrae/matrix/aequilibrae_matrix.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 99d1431fa..8e5c471fd 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd +import openmatrix as omx from scipy.sparse import coo_matrix # Checks if we can display OMX From 608141ac8932b3c2bee10f420149ed5975bcedc7 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 16:28:33 +1000 Subject: [PATCH 04/33] . --- aequilibrae/paths/traffic_assignment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aequilibrae/paths/traffic_assignment.py b/aequilibrae/paths/traffic_assignment.py index e24f16709..f139873d5 100644 --- a/aequilibrae/paths/traffic_assignment.py +++ b/aequilibrae/paths/traffic_assignment.py @@ -665,7 +665,6 @@ def save_skims(self, matrix_name: str, which_ones="final", format="omx", project mat_format = format.lower() if mat_format not in ["omx", "aem"]: raise ValueError("Matrix needs to be either OMX or native AequilibraE") - if mat_format == "omx" and not has_omx: raise ImportError("OpenMatrix is not available on your system") if not project: From 99fc49d2948cc8820e69891730a1e0a6fc684039 Mon Sep 17 00:00:00 2001 From: pveigadecamargo Date: Tue, 6 Feb 2024 16:30:53 +1000 Subject: [PATCH 05/33] . --- .../distribution/gravity_application.py | 14 +++--- aequilibrae/matrix/aequilibrae_matrix.py | 46 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/aequilibrae/distribution/gravity_application.py b/aequilibrae/distribution/gravity_application.py index 4eefadb89..2d07fae65 100644 --- a/aequilibrae/distribution/gravity_application.py +++ b/aequilibrae/distribution/gravity_application.py @@ -316,15 +316,15 @@ def __apply_function(self): elif self.model.function == "POWER": # self.output.matrices[self.core_name][i, :] = (np.power(self.impedance.matrix_view[i, :, 0], - self.model.alpha) * p * a)[:] self.output.matrix_view[i, :] = (np.power(self.impedance.matrix_view[i, :], -self.model.alpha) * p * a)[ - : - ] + : + ] elif self.model.function == "GAMMA": self.output.matrix_view[i, :] = ( - np.power(self.impedance.matrix_view[i, :], self.model.alpha) - * np.exp(-self.model.beta * self.impedance.matrix_view[i, :]) - * p - * a - )[:] + np.power(self.impedance.matrix_view[i, :], self.model.alpha) + * np.exp(-self.model.beta * self.impedance.matrix_view[i, :]) + * p + * a + )[:] # Deals with infinite and NaNs infinite = np.isinf(self.output.matrix_view[:, :]).astype(int) diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 8e5c471fd..18817ee08 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -154,14 +154,14 @@ def __save_as(self, file_name: str, cores: List[str]): omx_mat.close() def create_empty( - self, - file_name: str = None, - zones: int = None, - matrix_names: List[str] = None, - data_type: np.dtype = np.float64, - index_names: List[str] = None, - compressed: bool = False, - memory_only: bool = True, + self, + file_name: str = None, + zones: int = None, + matrix_names: List[str] = None, + data_type: np.dtype = np.float64, + index_names: List[str] = None, + compressed: bool = False, + memory_only: bool = True, ): """ Creates an empty matrix in the AequilibraE format @@ -280,14 +280,14 @@ def get_matrix(self, core: str, copy=False) -> np.ndarray: return mat def create_from_omx( - self, - file_path: str, - omx_path: str, - cores: List[str] = None, - mappings: List[str] = None, - robust: bool = True, - compressed: bool = False, - memory_only: bool = True, + self, + file_path: str, + omx_path: str, + cores: List[str] = None, + mappings: List[str] = None, + robust: bool = True, + compressed: bool = False, + memory_only: bool = True, ) -> None: """ Creates an AequilibraeMatrix from an original OpenMatrix @@ -903,15 +903,15 @@ def computational_view(self, core_list: List[str] = None): self.matrix_view = self.matrices[:, :, idx1] elif len(core_list) > 1: idx2 = self.names.index(core_list[-1]) - self.matrix_view = self.matrices[:, :, idx1: idx2 + 1] + self.matrix_view = self.matrices[:, :, idx1 : idx2 + 1] def copy( - self, - output_name: str = None, - cores: List[str] = None, - names: List[str] = None, - compress: bool = None, - memory_only: bool = True, + self, + output_name: str = None, + cores: List[str] = None, + names: List[str] = None, + compress: bool = None, + memory_only: bool = True, ): """ Copies a list of cores (or all cores) from one matrix file to another one From 0e668a9bd68a56c7ee2c0645a0cfb2325fa3639c Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 6 Feb 2024 16:33:41 +1000 Subject: [PATCH 06/33] Scratch parquet work --- tests/aequilibrae/paths/test_route_choice.py | 61 ++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 0c2a9447c..a3cb6d197 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -116,6 +116,67 @@ def test_route_choice_exceptions(self): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) + def test_debug(self): + import pyarrow as pa + import pyarrow.parquet as pq + import matplotlib.pyplot as plt + + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + nodes = [(1, x) for x in self.graph.centroids if x != 1] + + max_routes = 20 + results: dict[ + tuple[int, int], # OD keys + list[ # routes, length = max_routes + tuple[int, ...] # individual paths, length variable + ] + ] + + self.graph.centroids + + # Table of origins + results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + batch = pa.RecordBatch.from_arrays([pa.array(v) for v in results.values()], names=[str(k[1]) for k in results.keys()]) + table = pa.Table.from_batches([batch] * 5) + + lengths = [len(r) for k, v in results.items() for r in v] + # plt.hist(lengths) + # plt.show(block=False) + + ty = pa.map_( + pa.uint32(), # Destination + pa.list_( # route set + pa.list_(pa.uint32()), # path + # list_size=max_routes # We could make this fixed length, not sure if it has benefits? + ) + ) + + # Route map contains a column (a list below) for each origin. + # Each column is a map of (key, value) pairs where key is the destination and value is a list of paths + # paths are lists of uint32s. We don't need int64 since they'll never contain negative (invalid) link IDs + route_map = pa.array([ + [(k[1], v) for k, v in results.items()], # Real example, TODO: make coercion easier + [(k[1] * 100, v[::-1]) for k, v in results.items()] # different one just for the sake of it + ], type=ty) + + struct = pa.struct([ + pa.field("column index", pa.list_(pa.int32())), # List origin IDs at index they appear in the above ^^^ + pa.field("route map", pa.list_(route_map.type), nullable=False), # Not sure why this has to be a pa.list_(route_map.type) but just route_map.type doesn't work + ]) + + route_choice = pa.array([{ + "column index": [1, 100], # Here we encode the origin ID to index map separately. I couldn't figure out how to nest MapArrays/I'm not sure its needed + "routes map": route_map + }], type=struct) # Because the thing struct really stores lists of fields we could store multiple route choices in the same table + + table = pa.Table.from_pydict({'route choice': route_choice}) # Need to wrap the whole thing in a table to keep arrow happy + pq.write_table(table, "table.parquet") # Pretty sure with this structure writing a partitioned data set doesn't work + + # breakpoint() + assert False + def generate_line_strings(project, graph, results): """Debug method""" From 85fdb12123a2715b89730033fa4254402752f61a Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 7 Feb 2024 16:07:30 +1000 Subject: [PATCH 07/33] Better structuring for the table --- tests/aequilibrae/paths/test_route_choice.py | 43 +++++++++----------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index a3cb6d197..a7d2cc614 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -131,14 +131,16 @@ def test_debug(self): tuple[int, int], # OD keys list[ # routes, length = max_routes tuple[int, ...] # individual paths, length variable - ] + ], ] self.graph.centroids # Table of origins results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - batch = pa.RecordBatch.from_arrays([pa.array(v) for v in results.values()], names=[str(k[1]) for k in results.keys()]) + batch = pa.RecordBatch.from_arrays( + [pa.array(v) for v in results.values()], names=[str(k[1]) for k in results.keys()] + ) table = pa.Table.from_batches([batch] * 5) lengths = [len(r) for k, v in results.items() for r in v] @@ -150,29 +152,24 @@ def test_debug(self): pa.list_( # route set pa.list_(pa.uint32()), # path # list_size=max_routes # We could make this fixed length, not sure if it has benefits? - ) + ), ) - # Route map contains a column (a list below) for each origin. - # Each column is a map of (key, value) pairs where key is the destination and value is a list of paths - # paths are lists of uint32s. We don't need int64 since they'll never contain negative (invalid) link IDs - route_map = pa.array([ - [(k[1], v) for k, v in results.items()], # Real example, TODO: make coercion easier - [(k[1] * 100, v[::-1]) for k, v in results.items()] # different one just for the sake of it - ], type=ty) - - struct = pa.struct([ - pa.field("column index", pa.list_(pa.int32())), # List origin IDs at index they appear in the above ^^^ - pa.field("route map", pa.list_(route_map.type), nullable=False), # Not sure why this has to be a pa.list_(route_map.type) but just route_map.type doesn't work - ]) - - route_choice = pa.array([{ - "column index": [1, 100], # Here we encode the origin ID to index map separately. I couldn't figure out how to nest MapArrays/I'm not sure its needed - "routes map": route_map - }], type=struct) # Because the thing struct really stores lists of fields we could store multiple route choices in the same table - - table = pa.Table.from_pydict({'route choice': route_choice}) # Need to wrap the whole thing in a table to keep arrow happy - pq.write_table(table, "table.parquet") # Pretty sure with this structure writing a partitioned data set doesn't work + # Each element in this array corresponds to a row in the table + map1 = pa.array( + [ + [(k[1], v) for k, v in results.items()], # Real example, TODO: make coercion easier + [(k[1] * 100, v[::-1]) for k, v in results.items()], # different one just for the sake of it + ], + type=ty, + ) + + # Each row has the origin ID that it is for, same ordering as map1 + table = pa.Table.from_arrays([pa.array([1, 100]), map1], names=["origin id", "map"]) + pq.write_to_dataset(table, root_path="testing_parquet", partition_cols=["origin id"]) + + # We can read back in only select rows using the filter + dataset = pq.ParquetDataset("testing_parquet", filters=[("origin id", "=", 100)]) # breakpoint() assert False From eecaaa3bc92767d67614167ff2df266f91410f34 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 14:27:29 +1000 Subject: [PATCH 08/33] Add optional immediately disk saving --- aequilibrae/paths/route_choice.pxd | 16 ++ aequilibrae/paths/route_choice.pyx | 272 ++++++++++++++----- setup.py | 7 + tests/aequilibrae/paths/test_route_choice.py | 75 ++--- 4 files changed, 254 insertions(+), 116 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 951a82b2b..40d0417c4 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -6,6 +6,12 @@ from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair from libcpp cimport bool +cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. +cimport pyarrow as pa +cimport pyarrow.lib as libpa +cimport pyarrow._dataset_parquet as pq +from libcpp.memory cimport shared_ptr + # std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves # from libcpp.random cimport minstd_rand from libc.stdint cimport uint_fast32_t, uint_fast64_t @@ -143,3 +149,13 @@ cdef class RouteChoiceSet: long long [:] _thread_reached_first, unsigned int seed ) noexcept nogil + + @staticmethod + cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) + + +cdef class Checkpoint: + cdef: + public object where + public object schema + public object partition_cols diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index dfcb09ec6..93d635532 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -57,6 +57,7 @@ from aequilibrae import Graph from libc.math cimport INFINITY from libc.string cimport memcpy from libc.limits cimport UINT_MAX +from libc.stdlib cimport abort from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map @@ -65,7 +66,20 @@ from cython.operator cimport dereference as deref from cython.parallel cimport parallel, prange, threadid import numpy as np +import pyarrow as pa from typing import List, Tuple +import itertools +import pathlib +import logging + +cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. +cimport pyarrow as pa +cimport pyarrow.lib as libpa +import pyarrow.dataset +import pyarrow.parquet as pq +from libcpp.memory cimport shared_ptr + +from libc.stdio cimport fprintf, printf, stderr # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -77,6 +91,14 @@ cdef class RouteChoiceSet: Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data' """ + route_set_dtype = pa.list_(pa.uint32()) + + schema = pa.schema([ + pa.field("origin id", pa.uint32(), nullable=False), + pa.field("destination id", pa.uint32(), nullable=False), + pa.field("route set", route_set_dtype, nullable=False), + ]) + def __cinit__(self): """C level init. For C memory allocation and initialisation. Called exactly once per object.""" pass @@ -132,7 +154,16 @@ cdef class RouteChoiceSet: @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1, a_star: bool = True): + def batched( + self, + ods: List[Tuple[int, int]], + max_routes: int = 0, + max_depth: int = 0, + seed: int = 0, + cores: int = 1, + a_star: bool = True, + where: Optional[str] = None + ): """ Compute the a route set for a list of OD pairs. @@ -146,27 +177,19 @@ cdef class RouteChoiceSet: **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``1``. + **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. :Returns: **route sets** (:obj:`dict[tuple[int, int], list[tuple[int, ...]]]`): Returns a list of unique tuples of compact link IDs for - each OD pair provided (as keys). Represents paths from - ``origin`` to ``destination``. + each OD pair provided (as keys). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. """ cdef: - vector[pair[long long, long long]] c_ods = ods - - # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory - double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) - long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() - - # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the - # interface. - long long [:, :] _reached_first_matrix - - vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) long long origin_index, dest_index, i + unsigned int c_max_routes = max_routes + unsigned int c_max_depth = max_depth + unsigned int c_seed = seed + unsigned int c_cores = cores + long long o, d if max_routes == 0 and max_depth == 0: raise ValueError("Either `max_routes` or `max_depth` must be > 0") @@ -174,19 +197,27 @@ cdef class RouteChoiceSet: if max_routes < 0 or max_depth < 0 or cores < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") - cdef: - unsigned int c_max_routes = max_routes - unsigned int c_max_depth = max_depth - unsigned int c_seed = seed - unsigned int c_cores = cores - long long o, d - for o, d in ods: if self.nodes_to_indices_view[o] == -1: raise ValueError(f"Origin {o} is not present within the compact graph") if self.nodes_to_indices_view[d] == -1: raise ValueError(f"Destination {d} is not present within the compact graph") + cdef: + vector[pair[long long, long long]] c_ods + + # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory + double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) + long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() + + # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the + # interface. + long long [:, :] _reached_first_matrix + + vector[RouteSet_t *] *results + self.a_star = a_star if self.a_star: @@ -194,56 +225,71 @@ cdef class RouteChoiceSet: else: _reached_first_matrix = np.zeros((cores, self.num_nodes), dtype=np.int64) - with nogil, parallel(num_threads=c_cores): - for i in prange(c_ods.size()): - origin_index = self.nodes_to_indices_view[c_ods[i].first] - dest_index = self.nodes_to_indices_view[c_ods[i].second] - - if self.block_flows_through_centroids: - blocking_centroid_flows( - 0, # Always blocking + if where is not None: + checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) + batches = list(Checkpoint.batches(ods)) + results = new vector[RouteSet_t *](max(len(batch) for batch in batches)) + else: + batches = [ods] + results = new vector[RouteSet_t *](len(ods)) + + for batch in batches: + results.resize(len(batch)) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed + c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer + with nogil, parallel(num_threads=c_cores): + for i in prange(c_ods.size()): + origin_index = self.nodes_to_indices_view[c_ods[i].first] + dest_index = self.nodes_to_indices_view[c_ods[i].second] + + if self.block_flows_through_centroids: + blocking_centroid_flows( + 0, # Always blocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + + deref(results)[i] = RouteChoiceSet.generate_route_set( + self, origin_index, - self.zones, - self.graph_fs_view, + dest_index, + c_max_routes, + c_max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], b_nodes_matrix[threadid()], - self.b_nodes_view, + _reached_first_matrix[threadid()], + c_seed, ) - deref(results)[i] = RouteChoiceSet.generate_route_set( - self, - origin_index, - dest_index, - c_max_routes, - c_max_depth, - cost_matrix[threadid()], - predecessors_matrix[threadid()], - conn_matrix[threadid()], - b_nodes_matrix[threadid()], - _reached_first_matrix[threadid()], - c_seed, - ) - - if self.block_flows_through_centroids: - blocking_centroid_flows( - 1, # Always unblocking - origin_index, - self.zones, - self.graph_fs_view, - b_nodes_matrix[threadid()], - self.b_nodes_view, - ) + if self.block_flows_through_centroids: + blocking_centroid_flows( + 1, # Always unblocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + + table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results))) + + # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures + for result in deref(results): + for route in deref(result): + del route + + if where is None: # There was only one batch anyway + return table - # Build results into python objects using Cythons auto-conversion from vector[long long] to list then to tuple (for set operations) - res = [] - for result in deref(results): - links = [] - for route in deref(result): - links.append(tuple(deref(route))) - del route - res.append(links) + checkpoint.write(table) - del results - return dict(zip(ods, res)) + del table + + return @cython.initializedcheck(False) cdef void path_find( @@ -385,3 +431,95 @@ cdef class RouteChoiceSet: del banned return route_set + + @staticmethod + cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets): + cdef: + shared_ptr[libpa.CArray] paths + shared_ptr[libpa.CArray] offsets + libpa.CMemoryPool *pool = libpa.c_get_memory_pool() + libpa.CUInt32Builder *path_builder = new libpa.CUInt32Builder(pool) + libpa.CInt32Builder *offset_builder = new libpa.CInt32Builder(pool) # Must be Int32 *not* UInt32 + libpa.CUInt32Builder *o_col = new libpa.CUInt32Builder(pool) + libpa.CUInt32Builder *d_col = new libpa.CUInt32Builder(pool) + vector[shared_ptr[libpa.CArray]] columns + shared_ptr[libpa.CDataType] route_set_dtype = libpa.pyarrow_unwrap_data_type(RouteChoiceSet.route_set_dtype) + + libpa.CResult[shared_ptr[libpa.CArray]] route_set_results + + int offset = 0 + + columns.emplace_back(shared_ptr[libpa.CArray]()) # Origins + columns.emplace_back(shared_ptr[libpa.CArray]()) # Destination + columns.emplace_back(shared_ptr[libpa.CArray]()) # Route set + + for i in range(ods.size()): + route_set = route_sets[i] + + # Instead of construction a "list of lists" style object for storing the route sets we instead will construct one big array of link ids + # with a corresponding offsets array that indicates where each new row (path) starts. + for route in deref(route_set): + o_col.Append(ods[i].first) + d_col.Append(ods[i].second) + offset_builder.Append(offset) + # TODO: Pyarrows Cython API is incredibly lacking, it's just functional and doesn't include all the nice things the C++ API has + # One such thing its missing the AppendValues, which can add whole iterators at once in a much smarter fashion. + # We'll have to reimport/extern the classes we use if we want to avoid the below + for link in deref(route): + path_builder.Append(link) + + offset += route.size() + + path_builder.Finish(&paths) + offset_builder.Append(offset) # Mark the end of the array in offsets + offset_builder.Finish(&offsets) + + route_set_results = libpa.CListArray.FromArraysAndType(route_set_dtype, deref(offsets.get()), deref(paths.get()), pool, shared_ptr[libpa.CBuffer]()) + if not route_set_results.ok(): + fprintf(stderr, "The Routes Sets Aren't Alright: %s\n", route_set_results.status().ToString().c_str()) + abort() + + o_col.Finish(&columns[0]) + d_col.Finish(&columns[1]) + columns[2] = deref(route_set_results) + + cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(libpa.pyarrow_unwrap_schema(RouteChoiceSet.schema), columns) + + del path_builder + del offset_builder + del o_col + del d_col + return table + + +@cython.embedsignature(True) +cdef class Checkpoint: + """ + A small wrapper class to write a dataset partition by partition + """ + + def __init__(self, where, schema, partition_cols = None): + """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" + self.where = pathlib.Path(where) + self.schema = schema + self.partition_cols = partition_cols + + def write(self, table): + logger = logging.getLogger("aequilibrae") + pq.write_to_dataset( + table, + self.where, + partitioning=self.partition_cols, + partitioning_flavor="hive", + schema=self.schema, + use_threads=True, + existing_data_behavior="overwrite_or_ignore", + file_visitor=lambda written_file: logger.info(f"Wrote partition dataset at {written_file.path}") + ) + + def read_dataset(self): + return pa.dataset.dataset(self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema)) + + @staticmethod + def batches(ods: List[Tuple[int, int]]): + return (list(g) for k, g in itertools.groupby(sorted(ods), key=lambda x: x[0])) diff --git a/setup.py b/setup.py index 852b82dff..3fe13c06e 100644 --- a/setup.py +++ b/setup.py @@ -12,10 +12,14 @@ exec(f.read()) include_dirs = [np.get_include()] +libraries = [] +library_dirs = [] if iutil.find_spec("pyarrow") is not None: import pyarrow as pa include_dirs.append(pa.get_include()) + libraries.extend(pa.get_libraries()) + library_dirs.extend(pa.get_library_dirs()) is_win = "WINDOWS" in platform.platform().upper() is_mac = any(e in platform.platform().upper() for e in ["MACOS", "DARWIN"]) @@ -62,8 +66,11 @@ extra_link_args=link_args, define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], include_dirs=include_dirs, + libraries=libraries, + library_dirs=library_dirs, language="c++", ) + ext_mod_graph_building = Extension( "aequilibrae.paths.graph_building", [join("aequilibrae", "paths", "graph_building.pyx")], diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index a7d2cc614..1a7b89508 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -6,6 +6,7 @@ from unittest import TestCase import pandas as pd import numpy as np +import pyarrow as pa from aequilibrae import Graph, Project from aequilibrae.paths.route_choice import RouteChoiceSet @@ -116,64 +117,40 @@ def test_route_choice_exceptions(self): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) - def test_debug(self): - import pyarrow as pa - import pyarrow.parquet as pq - import matplotlib.pyplot as plt + # def test_debug2(self): + # import pyarrow as pa + # import pyarrow.parquet as pq + # import matplotlib.pyplot as plt - np.random.seed(0) - rc = RouteChoiceSet(self.graph) - # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - nodes = [(1, x) for x in self.graph.centroids if x != 1] + # np.random.seed(0) + # rc = RouteChoiceSet(self.graph) + # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - max_routes = 20 - results: dict[ - tuple[int, int], # OD keys - list[ # routes, length = max_routes - tuple[int, ...] # individual paths, length variable - ], - ] + # max_routes = 20 - self.graph.centroids + # breakpoint() + # # Table of origins + # table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - # Table of origins - results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - batch = pa.RecordBatch.from_arrays( - [pa.array(v) for v in results.values()], names=[str(k[1]) for k in results.keys()] - ) - table = pa.Table.from_batches([batch] * 5) - - lengths = [len(r) for k, v in results.items() for r in v] - # plt.hist(lengths) - # plt.show(block=False) - - ty = pa.map_( - pa.uint32(), # Destination - pa.list_( # route set - pa.list_(pa.uint32()), # path - # list_size=max_routes # We could make this fixed length, not sure if it has benefits? - ), - ) + # assert False - # Each element in this array corresponds to a row in the table - map1 = pa.array( - [ - [(k[1], v) for k, v in results.items()], # Real example, TODO: make coercion easier - [(k[1] * 100, v[::-1]) for k, v in results.items()], # different one just for the sake of it - ], - type=ty, - ) + def test_round_trip(self): + np.random.seed(1000) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + + max_routes = 20 - # Each row has the origin ID that it is for, same ordering as map1 - table = pa.Table.from_arrays([pa.array([1, 100]), map1], names=["origin id", "map"]) - pq.write_to_dataset(table, root_path="testing_parquet", partition_cols=["origin id"]) + path = join(self.project.project_base_path, "batched results") + table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1, where=path) - # We can read back in only select rows using the filter - dataset = pq.ParquetDataset("testing_parquet", filters=[("origin id", "=", 100)]) + dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) + new_table = dataset.to_table().to_pandas().sort_values(by=["origin id", "destination id"])[["origin id", "destination id", "route set"]].reset_index(drop=True) - # breakpoint() - assert False + table = table.to_pandas().sort_values(by=["origin id", "destination id"]).reset_index(drop=True) + pd.testing.assert_frame_equal(table, new_table) def generate_line_strings(project, graph, results): """Debug method""" From 945e8a93b0d32b88b2a98856da5e258e49636e9a Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 15:56:01 +1000 Subject: [PATCH 09/33] Test fixes --- aequilibrae/paths/route_choice.pyx | 13 +++++++------ tests/aequilibrae/paths/test_route_choice.py | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 93d635532..e07fb26c2 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -147,7 +147,7 @@ cdef class RouteChoiceSet: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. Represents paths from ``origin`` to ``destination``. """ - return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed, a_star=a_star)[(origin, destination)] + return [tuple(x) for x in self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed, a_star=a_star).column("route set").to_pylist()] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @@ -184,11 +184,6 @@ cdef class RouteChoiceSet: each OD pair provided (as keys). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. """ cdef: - long long origin_index, dest_index, i - unsigned int c_max_routes = max_routes - unsigned int c_max_depth = max_depth - unsigned int c_seed = seed - unsigned int c_cores = cores long long o, d if max_routes == 0 and max_depth == 0: @@ -204,6 +199,12 @@ cdef class RouteChoiceSet: raise ValueError(f"Destination {d} is not present within the compact graph") cdef: + long long origin_index, dest_index, i + unsigned int c_max_routes = max_routes + unsigned int c_max_depth = max_depth + unsigned int c_seed = seed + unsigned int c_cores = cores + vector[pair[long long, long long]] c_ods # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 1a7b89508..a43d7a0b2 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -69,8 +69,8 @@ def test_route_choice_empty_path(self): rc = RouteChoiceSet(self.graph) a = 1 - self.assertEqual( - rc.batched([(a, a)], max_routes=0, max_depth=3), {(a, a): []}, "Route set from self to self should be empty" + self.assertFalse( + rc.batched([(a, a)], max_routes=0, max_depth=3), "Route set from self to self should be empty" ) def test_route_choice_blocking_centroids(self): @@ -96,11 +96,12 @@ def test_route_choice_batched(self): max_routes = 20 results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - self.assertEqual(len(results), len(nodes), "Requested number of route sets not returned") + gb = results.to_pandas().groupby(by="origin id") + self.assertEqual(len(gb), len(nodes), "Requested number of route sets not returned") - for od, route_set in results.items(): - self.assertEqual(len(route_set), len(set(route_set)), f"Duplicate routes returned for {od}") - self.assertEqual(len(route_set), max_routes, f"Requested number of routes not returned for {od}") + for _, row in gb: + self.assertFalse(any(row["route set"].duplicated()), f"Duplicate routes returned for {row['origin id']}") + self.assertEqual(len(row["route set"]), max_routes, f"Requested number of routes not returned for {row['origin id']}") def test_route_choice_exceptions(self): rc = RouteChoiceSet(self.graph) From e3ea0c67f0108d09645e52fa514e23edf2e12b71 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 16:38:38 +1000 Subject: [PATCH 10/33] Update CI workflows In what I consider a bad move, the PyPi wheel installation of pyarrow requires modifying the installation environment to create symlinks for the linker. --- .github/workflows/build_linux.yml | 1 + .github/workflows/build_mac.yml | 1 + .github/workflows/debug_tests.yml | 1 + .github/workflows/documentation.yml | 1 + .github/workflows/test_linux_with_coverage.yml | 1 + .github/workflows/unit_tests.yml | 1 + 6 files changed, 6 insertions(+) diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index e0ac2b110..1ea834b87 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -17,6 +17,7 @@ jobs: - name: Install packages run: | python -m pip install --upgrade pip wheel cffi setuptools twine + python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Build manylinux Python wheels uses: RalfG/python-wheels-manylinux-build@v0.7.1 diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml index 401204b28..4ae1de2b5 100644 --- a/.github/workflows/build_mac.yml +++ b/.github/workflows/build_mac.yml @@ -28,6 +28,7 @@ jobs: python -m pip install --upgrade pip setuptools wheel twine pip install -r requirements.txt pip install -r requirements_additional.txt + python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Build run: | diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 15cae28c1..8197b13af 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -15,6 +15,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt + python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Download spatialite on Linux run: | diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 065c9ffd4..13505326f 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -27,6 +27,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r docs/requirements-docs.txt + python -c "import pyarrow; pyarrow.create_library_symlinks()" sudo apt update sudo apt install -y --fix-missing libsqlite3-mod-spatialite libspatialite-dev pandoc sudo ln -s /usr/lib/x86_64-linux-gnu/mod_spatialite.so /usr/lib/x86_64-linux-gnu/mod_spatialite diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index 38b5ae19f..f9d87903b 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -23,6 +23,7 @@ jobs: pip3 install spatialite pip3 install pytest pip3 install pytest-cov + python3 -c "import pyarrow; pyarrow.create_library_symlinks()" sudo ln -s /usr/lib/x86_64-linux-gnu/mod_spatialite.so /usr/lib/x86_64-linux-gnu/mod_spatialite - name: Compile library run: | diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5b6af38d9..e7c070fdc 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -19,6 +19,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt + python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Lint with ruff run: ruff . From 4c39a8bd076faed169df384bfd89b9b55d20d954 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 16:41:36 +1000 Subject: [PATCH 11/33] Move pyarrow to a hard dependency for now --- requirements.txt | 3 ++- requirements_additional.txt | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index d22ed0b9d..5eec71fb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +pyarrow diff --git a/requirements_additional.txt b/requirements_additional.txt index 96f666a5e..2d07ca02a 100644 --- a/requirements_additional.txt +++ b/requirements_additional.txt @@ -1,2 +1 @@ -pyarrow -PyQt5 \ No newline at end of file +PyQt5 From f18d13cd65ac03c13f6a62e6396378c2c7eef39f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 16:42:22 +1000 Subject: [PATCH 12/33] Linting --- tests/aequilibrae/paths/test_route_choice.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index a43d7a0b2..03755f5ec 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -69,9 +69,7 @@ def test_route_choice_empty_path(self): rc = RouteChoiceSet(self.graph) a = 1 - self.assertFalse( - rc.batched([(a, a)], max_routes=0, max_depth=3), "Route set from self to self should be empty" - ) + self.assertFalse(rc.batched([(a, a)], max_routes=0, max_depth=3), "Route set from self to self should be empty") def test_route_choice_blocking_centroids(self): a, b = 1, 20 @@ -101,7 +99,9 @@ def test_route_choice_batched(self): for _, row in gb: self.assertFalse(any(row["route set"].duplicated()), f"Duplicate routes returned for {row['origin id']}") - self.assertEqual(len(row["route set"]), max_routes, f"Requested number of routes not returned for {row['origin id']}") + self.assertEqual( + len(row["route set"]), max_routes, f"Requested number of routes not returned for {row['origin id']}" + ) def test_route_choice_exceptions(self): rc = RouteChoiceSet(self.graph) @@ -147,12 +147,18 @@ def test_round_trip(self): rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1, where=path) dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) - new_table = dataset.to_table().to_pandas().sort_values(by=["origin id", "destination id"])[["origin id", "destination id", "route set"]].reset_index(drop=True) + new_table = ( + dataset.to_table() + .to_pandas() + .sort_values(by=["origin id", "destination id"])[["origin id", "destination id", "route set"]] + .reset_index(drop=True) + ) table = table.to_pandas().sort_values(by=["origin id", "destination id"]).reset_index(drop=True) pd.testing.assert_frame_equal(table, new_table) + def generate_line_strings(project, graph, results): """Debug method""" import geopandas as gpd From 9ad188b4aec9bccd393b807285f481429b5a6f47 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 12 Feb 2024 16:55:28 +1000 Subject: [PATCH 13/33] CI again --- .github/workflows/build_linux.yml | 2 +- .github/workflows/documentation.yml | 2 +- .github/workflows/unit_tests.yml | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index 1ea834b87..51974e264 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -16,7 +16,7 @@ jobs: python-version: 3.9 - name: Install packages run: | - python -m pip install --upgrade pip wheel cffi setuptools twine + python -m pip install --upgrade pip wheel cffi setuptools twine pyarrow python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Build manylinux Python wheels diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 13505326f..09c008be6 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -27,7 +27,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r docs/requirements-docs.txt - python -c "import pyarrow; pyarrow.create_library_symlinks()" + sudo python -c "import pyarrow; pyarrow.create_library_symlinks()" sudo apt update sudo apt install -y --fix-missing libsqlite3-mod-spatialite libspatialite-dev pandoc sudo ln -s /usr/lib/x86_64-linux-gnu/mod_spatialite.so /usr/lib/x86_64-linux-gnu/mod_spatialite diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index e7c070fdc..3133ec982 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -48,6 +48,7 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt + python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Download spatialite on Linux if: matrix.os == 'ubuntu-latest' From 6c79a1794049ef6a6ac549b66f43b0ca14ce3a3d Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 13 Feb 2024 17:34:53 +1000 Subject: [PATCH 14/33] Warn about duplicate OD pairs, fix memory corruption, disable A* --- aequilibrae/paths/route_choice.pyx | 26 +++++++++++++------- tests/aequilibrae/paths/test_route_choice.py | 15 ++++++++++- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index e07fb26c2..e4d208776 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -71,6 +71,7 @@ from typing import List, Tuple import itertools import pathlib import logging +import warnings cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. cimport pyarrow as pa @@ -110,14 +111,17 @@ cdef class RouteChoiceSet: self.graph_fs_view = graph.compact_fs self.b_nodes_view = graph.compact_graph.b_node.values self.nodes_to_indices_view = graph.compact_nodes_to_indices - tmp = graph.lonlat_index.loc[graph.compact_all_nodes] - self.lat_view = tmp.lat.values - self.lon_view = tmp.lon.values + + # tmp = graph.lonlat_index.loc[graph.compact_all_nodes] + # self.lat_view = tmp.lat.values + # self.lon_view = tmp.lon.values + self.a_star = False + self.ids_graph_view = graph.compact_graph.id.values self.num_nodes = graph.compact_num_nodes self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows - self.a_star = True + def __dealloc__(self): """ @@ -219,20 +223,24 @@ cdef class RouteChoiceSet: vector[RouteSet_t *] *results - self.a_star = a_star + # self.a_star = a_star if self.a_star: _reached_first_matrix = np.zeros((cores, 1), dtype=np.int64) # Dummy array to allow slicing else: - _reached_first_matrix = np.zeros((cores, self.num_nodes), dtype=np.int64) + _reached_first_matrix = np.zeros((cores, self.num_nodes + 1), dtype=np.int64) + + set_ods = set(ods) + if len(set_ods) != len(ods): + warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs") if where is not None: checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) - batches = list(Checkpoint.batches(ods)) + batches = list(Checkpoint.batches(set_ods)) results = new vector[RouteSet_t *](max(len(batch) for batch in batches)) else: - batches = [ods] - results = new vector[RouteSet_t *](len(ods)) + batches = [list(set_ods)] + results = new vector[RouteSet_t *](len(set_ods)) for batch in batches: results.resize(len(batch)) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 03755f5ec..4b4417f89 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -92,7 +92,7 @@ def test_route_choice_batched(self): nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] max_routes = 20 - results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + results = rc.batched(nodes, max_routes=max_routes, max_depth=10) gb = results.to_pandas().groupby(by="origin id") self.assertEqual(len(gb), len(nodes), "Requested number of route sets not returned") @@ -103,6 +103,19 @@ def test_route_choice_batched(self): len(row["route set"]), max_routes, f"Requested number of routes not returned for {row['origin id']}" ) + def test_route_choice_duplicates_batched(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [(1, 20)] * 5 + + max_routes = 20 + with self.assertWarns(UserWarning): + results = rc.batched(nodes, max_routes=max_routes, max_depth=10) + + gb = results.to_pandas().groupby(by="origin id") + self.assertEqual(len(gb), 1, "Duplicates not dropped") + + def test_route_choice_exceptions(self): rc = RouteChoiceSet(self.graph) args = [ From d3677b640039bb3b6216d7bb3d64dd852f6effb3 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 14 Feb 2024 15:23:44 +1000 Subject: [PATCH 15/33] Fix path order --- aequilibrae/paths/route_choice.pyx | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index e4d208776..a03fbad96 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -62,7 +62,7 @@ from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from cython.operator cimport dereference as deref +from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid import numpy as np @@ -236,7 +236,7 @@ cdef class RouteChoiceSet: if where is not None: checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) - batches = list(Checkpoint.batches(set_ods)) + batches = list(Checkpoint.batches(list(set_ods))) results = new vector[RouteSet_t *](max(len(batch) for batch in batches)) else: batches = [list(set_ods)] @@ -456,6 +456,8 @@ cdef class RouteChoiceSet: libpa.CResult[shared_ptr[libpa.CArray]] route_set_results + vector[long long].reverse_iterator riter + int offset = 0 columns.emplace_back(shared_ptr[libpa.CArray]()) # Origins @@ -474,8 +476,10 @@ cdef class RouteChoiceSet: # TODO: Pyarrows Cython API is incredibly lacking, it's just functional and doesn't include all the nice things the C++ API has # One such thing its missing the AppendValues, which can add whole iterators at once in a much smarter fashion. # We'll have to reimport/extern the classes we use if we want to avoid the below - for link in deref(route): - path_builder.Append(link) + riter = route.rbegin() + while riter != route.rend(): + path_builder.Append(deref(riter)) + inc(riter) offset += route.size() @@ -484,9 +488,6 @@ cdef class RouteChoiceSet: offset_builder.Finish(&offsets) route_set_results = libpa.CListArray.FromArraysAndType(route_set_dtype, deref(offsets.get()), deref(paths.get()), pool, shared_ptr[libpa.CBuffer]()) - if not route_set_results.ok(): - fprintf(stderr, "The Routes Sets Aren't Alright: %s\n", route_set_results.status().ToString().c_str()) - abort() o_col.Finish(&columns[0]) d_col.Finish(&columns[1]) From b5ed6a477758c02ceed5c791fc68aa069214a651 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 14 Feb 2024 17:12:19 +1000 Subject: [PATCH 16/33] Add link penalisation --- aequilibrae/paths/route_choice.pxd | 17 ++- aequilibrae/paths/route_choice.pyx | 122 +++++++++++++++---- tests/aequilibrae/paths/test_route_choice.py | 70 ++++++----- 3 files changed, 154 insertions(+), 55 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 40d0417c4..c5c1dfadc 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -136,7 +136,7 @@ cdef class RouteChoiceSet: long long [:] thread_reached_first ) noexcept nogil - cdef RouteSet_t *generate_route_set( + cdef RouteSet_t *bfsle( RouteChoiceSet self, long origin_index, long dest_index, @@ -150,6 +150,21 @@ cdef class RouteChoiceSet: unsigned int seed ) noexcept nogil + cdef RouteSet_t *link_penalisation( + RouteChoiceSet self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes, + long long [:] _thread_reached_first, + double penatly, + unsigned int seed + ) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index a03fbad96..6f38522e9 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -131,27 +131,24 @@ cdef class RouteChoiceSet: pass @cython.embedsignature(True) - def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0, a_star: bool = True): + def run(self, origin: int, destination: int, *args, **kwargs): """ Compute the a route set for a single OD pair. Often the returned list's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all unique possible paths have been found then a smaller set will be returned. - Thin wrapper around ``RouteChoiceSet.batched``. + Thin wrapper around ``RouteChoiceSet.batched``. Additional arguments are forwarded to ``RouteChoiceSet.batched``. :Arguments: **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a centroid. **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to choose a centroid. - **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. - **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. - **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. Represents paths from ``origin`` to ``destination``. """ - return [tuple(x) for x in self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed, a_star=a_star).column("route set").to_pylist()] + return [tuple(x) for x in self.batched([(origin, destination)], *args, **kwargs).column("route set").to_pylist()] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @@ -166,7 +163,9 @@ cdef class RouteChoiceSet: seed: int = 0, cores: int = 1, a_star: bool = True, - where: Optional[str] = None + bfsle: bool = True, + penalty: float = 0.0, + where: Optional[str] = None, ): """ Compute the a route set for a list of OD pairs. @@ -178,9 +177,12 @@ cdef class RouteChoiceSet: **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination node ID must be present within compact graph. Recommended to choose a centroids. **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. - **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. + **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation. + Must be non-negative. Default of ``0`` for unlimited. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``1``. + **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. + **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. :Returns: @@ -196,6 +198,12 @@ cdef class RouteChoiceSet: if max_routes < 0 or max_depth < 0 or cores < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") + if penalty != 0.0 and bfsle: + raise ValueError("Link penalisation (`penatly` > 1.0) and `bfsle` cannot be enabled at once") + + if not bfsle and penalty <= 1.0: + raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") + for o, d in ods: if self.nodes_to_indices_view[o] == -1: raise ValueError(f"Origin {o} is not present within the compact graph") @@ -260,19 +268,35 @@ cdef class RouteChoiceSet: self.b_nodes_view, ) - deref(results)[i] = RouteChoiceSet.generate_route_set( - self, - origin_index, - dest_index, - c_max_routes, - c_max_depth, - cost_matrix[threadid()], - predecessors_matrix[threadid()], - conn_matrix[threadid()], - b_nodes_matrix[threadid()], - _reached_first_matrix[threadid()], - c_seed, - ) + if bfsle: + deref(results)[i] = RouteChoiceSet.bfsle( + self, + origin_index, + dest_index, + c_max_routes, + c_max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], + b_nodes_matrix[threadid()], + _reached_first_matrix[threadid()], + c_seed, + ) + else: + deref(results)[i] = RouteChoiceSet.link_penalisation( + self, + origin_index, + dest_index, + c_max_routes, + c_max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], + b_nodes_matrix[threadid()], + _reached_first_matrix[threadid()], + penalty, + c_seed, + ) if self.block_flows_through_centroids: blocking_centroid_flows( @@ -344,7 +368,7 @@ cdef class RouteChoiceSet: @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - cdef RouteSet_t *generate_route_set( + cdef RouteSet_t *bfsle( RouteChoiceSet self, long origin_index, long dest_index, @@ -441,6 +465,60 @@ cdef class RouteChoiceSet: return route_set + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cdef RouteSet_t *link_penalisation( + RouteChoiceSet self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes, + long long [:] _thread_reached_first, + double penatly, + unsigned int seed + ) noexcept nogil: + cdef: + RouteSet_t *route_set + + # Scratch objects + vector[long long] *vec + long long p, connector + + max_routes = max_routes if max_routes != 0 else UINT_MAX + max_depth = max_depth if max_depth != 0 else UINT_MAX + route_set = new RouteSet_t() + memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) + + for depth in range(max_depth): + if route_set.size() >= max_routes: + break + + RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first) + + if thread_predecessors[dest_index] >= 0: + vec = new vector[long long]() + # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be + p = dest_index + while p != origin_index: + connector = thread_conn[p] + p = thread_predecessors[p] + vec.push_back(connector) + + for connector in deref(vec): + thread_cost[connector] *= penatly + + route_set.insert(vec) + else: + break + + return route_set + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets): cdef: diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 4b4417f89..1400cea9e 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -37,54 +37,57 @@ def test_route_choice(self): rc = RouteChoiceSet(self.graph) a, b = 1, 20 - results = rc.run(a, b, max_routes=10, max_depth=0) - self.assertEqual(len(results), 10, "Returned more routes than expected") - self.assertEqual(len(results), len(set(results)), "Returned duplicate routes") - - # With a depth 1 only one path will be found - results = rc.run(a, b, max_routes=0, max_depth=1) - self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") - self.assertListEqual(results, [(58, 52, 29, 24, 12, 8, 5, 1)], "Initial route isn't the shortest A* route") - - # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths - results2 = rc.run(a, b, max_routes=0, max_depth=2) - self.assertEqual( - len(results2), 1 + len(results[0]) - 4, "Depth of 2 didn't yield the expected number of routes" - ) - self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + results = rc.run(a, b, max_routes=10, **kwargs) + self.assertEqual(len(results), 10, "Returned more routes than expected") + self.assertEqual(len(results), len(set(results)), "Returned duplicate routes") + + # With a depth 1 only one path will be found + results = rc.run(a, b, max_routes=0, max_depth=1) + self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") + self.assertListEqual(results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route") + + # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths + results2 = rc.run(a, b, max_routes=0, max_depth=2, **kwargs) + self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") self.assertListEqual( - rc.run(a, b, max_routes=0, max_depth=2, seed=0), - rc.run(a, b, max_routes=0, max_depth=2, seed=10), + rc.run(a, b, max_routes=0, seed=0, max_depth=2), + rc.run(a, b, max_routes=0, seed=10, max_depth=2), "Seeded and unseeded results differ with unlimited `max_routes` (queue is incorrectly being shuffled)", ) self.assertNotEqual( - rc.run(a, b, max_routes=3, max_depth=2, seed=0), - rc.run(a, b, max_routes=3, max_depth=2, seed=10), + rc.run(a, b, max_routes=3, seed=0, max_depth=2), + rc.run(a, b, max_routes=3, seed=10, max_depth=2), "Seeded and unseeded results don't differ with limited `max_routes` (queue is not being shuffled)", ) def test_route_choice_empty_path(self): - rc = RouteChoiceSet(self.graph) - a = 1 + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + rc = RouteChoiceSet(self.graph) + a = 1 - self.assertFalse(rc.batched([(a, a)], max_routes=0, max_depth=3), "Route set from self to self should be empty") + self.assertFalse(rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs), "Route set from self to self should be empty") def test_route_choice_blocking_centroids(self): - a, b = 1, 20 + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + a, b = 1, 20 - self.graph.set_blocked_centroid_flows(False) - rc = RouteChoiceSet(self.graph) + self.graph.set_blocked_centroid_flows(False) + rc = RouteChoiceSet(self.graph) - results = rc.run(a, b, max_routes=2, max_depth=2) - self.assertNotEqual(results, [], "Unblocked centroid flow found no paths") + results = rc.run(a, b, max_routes=2, max_depth=2, **kwargs) + self.assertNotEqual(results, [], "Unblocked centroid flow found no paths") - self.graph.set_blocked_centroid_flows(True) - rc = RouteChoiceSet(self.graph) + self.graph.set_blocked_centroid_flows(True) + rc = RouteChoiceSet(self.graph) - results = rc.run(a, b, max_routes=2, max_depth=2) - self.assertListEqual(results, [], "Blocked centroid flow found a path") + results = rc.run(a, b, max_routes=2, max_depth=2, **kwargs) + self.assertListEqual(results, [], "Blocked centroid flow found a path") def test_route_choice_batched(self): np.random.seed(0) @@ -115,7 +118,6 @@ def test_route_choice_duplicates_batched(self): gb = results.to_pandas().groupby(by="origin id") self.assertEqual(len(gb), 1, "Duplicates not dropped") - def test_route_choice_exceptions(self): rc = RouteChoiceSet(self.graph) args = [ @@ -131,6 +133,10 @@ def test_route_choice_exceptions(self): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) + with self.assertRaises(ValueError): + rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5) + rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1) + # def test_debug2(self): # import pyarrow as pa # import pyarrow.parquet as pq From 738a657902ddce219b3d2248fdbce86e7a532396 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 14 Feb 2024 17:15:03 +1000 Subject: [PATCH 17/33] Linting --- tests/aequilibrae/paths/test_route_choice.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 1400cea9e..f1a5871ed 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -46,7 +46,9 @@ def test_route_choice(self): # With a depth 1 only one path will be found results = rc.run(a, b, max_routes=0, max_depth=1) self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") - self.assertListEqual(results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route") + self.assertListEqual( + results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route" + ) # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths results2 = rc.run(a, b, max_routes=0, max_depth=2, **kwargs) @@ -70,7 +72,10 @@ def test_route_choice_empty_path(self): rc = RouteChoiceSet(self.graph) a = 1 - self.assertFalse(rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs), "Route set from self to self should be empty") + self.assertFalse( + rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs), + "Route set from self to self should be empty", + ) def test_route_choice_blocking_centroids(self): for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: From 7d15ef0903d2c42a34886ed2319850d91c8f17e0 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Thu, 15 Feb 2024 17:13:43 +1000 Subject: [PATCH 18/33] Add parallelised sparse frequency implementation and test Code is unprofiled, I'm not sure if this is the best approach but it works well. It works by stacking all the paths in a route set into a big vector, then sorting it. By sorting it all the links become grouped and we can just count their occurrences. This has the added benefit of the resulting frequency arrays being sorted so we can bisect then later. Generally this has really simple memory accesses and is easy to read. Another possible implementation might be to sort each path individually, then walk and merge them all (not adding duplicates). This is trickier, requires a lot of book keeping to walk n arrays correctly, upside is lower memory usage provided we sort inplace, if not then it should be the same. --- aequilibrae/paths/route_choice.pxd | 3 + aequilibrae/paths/route_choice.pyx | 70 +++++++++++++++++++- tests/aequilibrae/paths/test_route_choice.py | 41 ++++++++---- 3 files changed, 101 insertions(+), 13 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index c5c1dfadc..05326961b 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -165,6 +165,9 @@ cdef class RouteChoiceSet: unsigned int seed ) noexcept nogil + @staticmethod + cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 6f38522e9..0afb82fed 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -62,6 +62,7 @@ from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair +from libcpp.algorithm cimport sort from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid @@ -166,6 +167,7 @@ cdef class RouteChoiceSet: bfsle: bool = True, penalty: float = 0.0, where: Optional[str] = None, + freq_as_well = False, ): """ Compute the a route set for a list of OD pairs. @@ -310,13 +312,27 @@ cdef class RouteChoiceSet: table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results))) + + if freq_as_well: + freqs = [] + for freq in deref(RouteChoiceSet.frequency(deref(results), c_cores)): + freqs.append( + ( + list(deref(freq.first)), + list(deref(freq.second)), + ) + ) + # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures for result in deref(results): for route in deref(result): del route if where is None: # There was only one batch anyway - return table + if freq_as_well: + return table, freqs + else: + return table checkpoint.write(table) @@ -519,6 +535,58 @@ cdef class RouteChoiceSet: return route_set + + @staticmethod + cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil: + cdef: + vector[pair[vector[long long] *, vector[long long] *]] *freq_set = new vector[pair[vector[long long] *, vector[long long] *]]() + vector[long long] *keys + vector[long long] *counts + + # Scatch objects + vector[long long] *link_union + size_t length, count + long long link, i + + freq_set.reserve(route_sets.size()) + + with parallel(num_threads=cores): + link_union = new vector[long long]() + for i in prange(route_sets.size()): + route_set = route_sets[i] + + link_union.clear() + + keys = new vector[long long]() + counts = new vector[long long]() + + length = 0 + for route in deref(route_set): + length = length + route.size() + link_union.reserve(length) + + for route in deref(route_set): + link_union.insert(link_union.end(), route.begin(), route.end()) + + sort(link_union.begin(), link_union.end()) + + union_iter = link_union.begin() + while union_iter != link_union.end(): + count = 0 + link = deref(union_iter) + while link == deref(union_iter): + count = count + 1 + inc(union_iter) + + keys.push_back(link) + counts.push_back(count) + + freq_set.emplace(freq_set.cbegin() + i, keys, counts) + + del link_union + + return freq_set + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets): cdef: diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index f1a5871ed..19b126939 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -142,22 +142,22 @@ def test_route_choice_exceptions(self): rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5) rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1) - # def test_debug2(self): - # import pyarrow as pa - # import pyarrow.parquet as pq - # import matplotlib.pyplot as plt + def test_debug2(self): + import pyarrow as pa + import pyarrow.parquet as pq + import matplotlib.pyplot as plt - # np.random.seed(0) - # rc = RouteChoiceSet(self.graph) - # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - # max_routes = 20 + max_routes = 20 - # breakpoint() - # # Table of origins - # table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + # Table of origins + table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + breakpoint() - # assert False + assert False def test_round_trip(self): np.random.seed(1000) @@ -182,6 +182,23 @@ def test_round_trip(self): pd.testing.assert_frame_equal(table, new_table) + def test_frequency_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table, freqs = rc.batched(nodes, max_routes=20, max_depth=10, freq_as_well=True) + table = table.to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, freq in zip(set(nodes), freqs): # iteration order is changed by set operation + df = gb.get_group(od) + bincount = np.bincount(np.hstack(df["route set"].values)) + keys = bincount.nonzero()[0] + + vals = bincount[keys] + self.assertListEqual(list(keys), freq[0], "Keys of frequencies differreturns") + self.assertListEqual(list(vals), freq[1], "Values of frequencies differ") + def generate_line_strings(project, graph, results): """Debug method""" From 1543549558371cfffeeee386304477e0bcb57fb0 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 16 Feb 2024 11:47:46 +1000 Subject: [PATCH 19/33] Remove debug test --- aequilibrae/paths/route_choice.pyx | 11 +++++++++-- tests/aequilibrae/paths/test_route_choice.py | 17 ----------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 0afb82fed..d954472df 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -535,7 +535,10 @@ cdef class RouteChoiceSet: return route_set - + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) @staticmethod cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil: cdef: @@ -543,7 +546,7 @@ cdef class RouteChoiceSet: vector[long long] *keys vector[long long] *counts - # Scatch objects + # Scratch objects vector[long long] *link_union size_t length, count long long link, i @@ -587,6 +590,10 @@ cdef class RouteChoiceSet: return freq_set + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets): cdef: diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 19b126939..7d42ee39e 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -142,23 +142,6 @@ def test_route_choice_exceptions(self): rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5) rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1) - def test_debug2(self): - import pyarrow as pa - import pyarrow.parquet as pq - import matplotlib.pyplot as plt - - np.random.seed(0) - rc = RouteChoiceSet(self.graph) - nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - - max_routes = 20 - - # Table of origins - table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - breakpoint() - - assert False - def test_round_trip(self): np.random.seed(1000) rc = RouteChoiceSet(self.graph) From 31aaaadead5883b4e32c3d6b6c075454a5d9dcd9 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 16 Feb 2024 14:09:48 +1000 Subject: [PATCH 20/33] Add route cost computation and test --- aequilibrae/paths/route_choice.pxd | 3 ++ aequilibrae/paths/route_choice.pyx | 43 +++++++++++++++++++- tests/aequilibrae/paths/test_route_choice.py | 13 ++++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 05326961b..e113af579 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -168,6 +168,9 @@ cdef class RouteChoiceSet: @staticmethod cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil + @staticmethod + cdef vector[vector[double] *] *compute_cost(vector[RouteSet_t *] &route_sets, double[:] cost_view, unsigned int cores) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index d954472df..32935c573 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -168,6 +168,7 @@ cdef class RouteChoiceSet: penalty: float = 0.0, where: Optional[str] = None, freq_as_well = False, + cost_as_well = False, ): """ Compute the a route set for a list of OD pairs. @@ -323,13 +324,20 @@ cdef class RouteChoiceSet: ) ) + if cost_as_well: + costs = [] + for cost_vec in deref(RouteChoiceSet.compute_cost(deref(results), self.cost_view, c_cores)): + costs.append(list(deref(cost_vec))) + # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures for result in deref(results): for route in deref(result): del route if where is None: # There was only one batch anyway - if freq_as_well: + if cost_as_well: + return table, costs + elif freq_as_well: return table, freqs else: return table @@ -590,6 +598,39 @@ cdef class RouteChoiceSet: return freq_set + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef vector[vector[double] *] *compute_cost(vector[RouteSet_t *] &route_sets, double[:] cost_view, unsigned int cores) noexcept nogil: + cdef: + vector[vector[double] *] *cost_set = new vector[vector[double] *](route_sets.size()) + vector[double] *cost_vec + + # Scratch objects + vector[long long] *link_union + size_t length, + double cost + long long link, i + + with parallel(num_threads=cores): + for i in prange(route_sets.size()): + route_set = route_sets[i] + cost_vec = new vector[double]() + cost_vec.reserve(route_set.size()) + + for route in deref(route_set): + cost = 0.0 + for link in deref(route): + cost = cost + cost_view[link] + + cost_vec.push_back(cost) + + deref(cost_set)[i] = cost_vec + + return cost_set + @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 7d42ee39e..62beab925 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -182,6 +182,19 @@ def test_frequency_results(self): self.assertListEqual(list(keys), freq[0], "Keys of frequencies differreturns") self.assertListEqual(list(vals), freq[1], "Values of frequencies differ") + def test_cost_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table, costs = rc.batched(nodes, max_routes=20, max_depth=10, cost_as_well=True) + table = table.to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation + df = gb.get_group(od) + for route, cost in zip(df["route set"].values, cost_vec): + np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + def generate_line_strings(project, graph, results): """Debug method""" From 71257eb569ce5b64e2ebfd5818dbe00d68786c5b Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 16 Feb 2024 15:23:51 +1000 Subject: [PATCH 21/33] Add psl gamma computation, not sure what to test against --- aequilibrae/paths/route_choice.pxd | 9 +++ aequilibrae/paths/route_choice.pyx | 95 ++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 13 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index e113af579..f6b4ab084 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -171,6 +171,15 @@ cdef class RouteChoiceSet: @staticmethod cdef vector[vector[double] *] *compute_cost(vector[RouteSet_t *] &route_sets, double[:] cost_view, unsigned int cores) noexcept nogil + @staticmethod + cdef vector[vector[double] *] *compute_psl( + vector[RouteSet_t *] &route_sets, + vector[pair[vector[long long] *, vector[long long] *]] &freq_sets, + vector[vector[double] *] &total_costs, + double[:] cost_view, + unsigned int cores + ) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 32935c573..e8a4c4837 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -62,7 +62,7 @@ from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from libcpp.algorithm cimport sort +from libcpp.algorithm cimport sort, lower_bound from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid @@ -169,6 +169,7 @@ cdef class RouteChoiceSet: where: Optional[str] = None, freq_as_well = False, cost_as_well = False, + psl_as_well = False, ): """ Compute the a route set for a list of OD pairs. @@ -313,21 +314,33 @@ cdef class RouteChoiceSet: table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results))) + returns = [] + freqs = RouteChoiceSet.frequency(deref(results), c_cores) if freq_as_well: - freqs = [] - for freq in deref(RouteChoiceSet.frequency(deref(results), c_cores)): - freqs.append( + freqs_list = [] + for freq in deref(freqs): + freqs_list.append( ( list(deref(freq.first)), list(deref(freq.second)), ) ) + returns.append(freqs_list) + costs = RouteChoiceSet.compute_cost(deref(results), self.cost_view, c_cores) if cost_as_well: - costs = [] - for cost_vec in deref(RouteChoiceSet.compute_cost(deref(results), self.cost_view, c_cores)): - costs.append(list(deref(cost_vec))) + costs_list = [] + for cost_vec in deref(costs): + costs_list.append(list(deref(cost_vec))) + returns.append(costs_list) + + psls = RouteChoiceSet.compute_psl(deref(results), deref(freqs), deref(costs), self.cost_view, c_cores) + if psl_as_well: + psls_list = [] + for psl_vec in deref(psls): + psls_list.append(list(deref(psl_vec))) + returns.append(psls_list) # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures for result in deref(results): @@ -335,13 +348,12 @@ cdef class RouteChoiceSet: del route if where is None: # There was only one batch anyway - if cost_as_well: - return table, costs - elif freq_as_well: - return table, freqs + if returns: + return table, *returns else: return table + checkpoint.write(table) del table @@ -609,8 +621,6 @@ cdef class RouteChoiceSet: vector[double] *cost_vec # Scratch objects - vector[long long] *link_union - size_t length, double cost long long link, i @@ -631,6 +641,65 @@ cdef class RouteChoiceSet: return cost_set + @staticmethod + cdef vector[vector[double] *] *compute_psl( + vector[RouteSet_t *] &route_sets, + vector[pair[vector[long long] *, vector[long long] *]] &freq_sets, + vector[vector[double] *] &total_costs, + double[:] cost_view, + unsigned int cores + ) noexcept nogil: + """ + Notation changes: + i: j + a: link + t_a: cost_view + c_i: total_costs + A_i: route + sum_{k in R}: delta_{a,k}: freq_set + """ + cdef: + vector[vector[double] *] *psl_set = new vector[vector[double] *](route_sets.size()) + vector[double] *psl_vec + + # Scratch objects + pair[vector[long long] *, vector[long long] *] freq_set + vector[long long].const_iterator link_iter + vector[double] *total_cost + double prob + long long link, j + size_t i + + with parallel(num_threads=cores): + for i in prange(route_sets.size()): + route_set = route_sets[i] + freq_set = freq_sets[i] + total_cost = total_costs[i] + psl_vec = new vector[double]() + psl_vec.reserve(route_set.size()) + + j = 0 + for route in deref(route_set): + prob = 0.0 + for link in deref(route): + # We know the frequency table is ordered and contains every link in the union of the routes. + # We want to find the index of the link, and use that to look up it's frequency + link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) + + if link != deref(link_iter): + fprintf(stderr, "Link %d not found in link set? not possible??\n", link) + abort() + + prob = prob + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + + psl_vec.push_back(prob / deref(total_cost)[j]) + + j = j + 1 + + deref(psl_set)[i] = psl_vec + + return psl_set + @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) From 7688d758da5787bfc92983963dc21e759b363133 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 16 Feb 2024 17:35:28 +1000 Subject: [PATCH 22/33] Add scratch probability implementation --- aequilibrae/paths/route_choice.pxd | 9 +++ aequilibrae/paths/route_choice.pyx | 72 ++++++++++++++++++-- tests/aequilibrae/paths/test_route_choice.py | 28 ++++++++ 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index f6b4ab084..3a05237d6 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -180,6 +180,15 @@ cdef class RouteChoiceSet: unsigned int cores ) noexcept nogil + @staticmethod + cdef vector[vector[double] *] *compute_prob( + vector[vector[double] *] &total_costs, + vector[vector[double] *] &gammas, + double beta, + double theta, + unsigned int cores + ) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index e8a4c4837..ea2530dbc 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -54,7 +54,7 @@ routes aren't required small-ish things like the memcpy and banned link set copy from aequilibrae import Graph -from libc.math cimport INFINITY +from libc.math cimport INFINITY, pow, exp from libc.string cimport memcpy from libc.limits cimport UINT_MAX from libc.stdlib cimport abort @@ -170,6 +170,9 @@ cdef class RouteChoiceSet: freq_as_well = False, cost_as_well = False, psl_as_well = False, + prob_as_well = False, + beta: float = 1.0, + theta: float = 1.0, ): """ Compute the a route set for a list of OD pairs. @@ -342,6 +345,13 @@ cdef class RouteChoiceSet: psls_list.append(list(deref(psl_vec))) returns.append(psls_list) + probs = RouteChoiceSet.compute_prob(deref(costs), deref(psls), beta, theta, c_cores) + if prob_as_well: + probs_list = [] + for prob_vec in deref(probs): + probs_list.append(list(deref(prob_vec))) + returns.append(probs_list) + # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures for result in deref(results): for route in deref(result): @@ -666,7 +676,7 @@ cdef class RouteChoiceSet: pair[vector[long long] *, vector[long long] *] freq_set vector[long long].const_iterator link_iter vector[double] *total_cost - double prob + double gamma long long link, j size_t i @@ -680,7 +690,7 @@ cdef class RouteChoiceSet: j = 0 for route in deref(route_set): - prob = 0.0 + gamma = 0.0 for link in deref(route): # We know the frequency table is ordered and contains every link in the union of the routes. # We want to find the index of the link, and use that to look up it's frequency @@ -690,9 +700,9 @@ cdef class RouteChoiceSet: fprintf(stderr, "Link %d not found in link set? not possible??\n", link) abort() - prob = prob + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] - psl_vec.push_back(prob / deref(total_cost)[j]) + psl_vec.push_back(gamma / deref(total_cost)[j]) j = j + 1 @@ -700,6 +710,58 @@ cdef class RouteChoiceSet: return psl_set + @staticmethod + cdef vector[vector[double] *] *compute_prob( + vector[vector[double] *] &total_costs, + vector[vector[double] *] &gammas, + double beta, + double theta, + unsigned int cores + ) noexcept nogil: + cdef: + vector[vector[double] *] *prob_set = new vector[vector[double] *](total_costs.size()) + + # Scratch objects + vector[double] *total_cost + vector[double] *gamma_vec + vector[double] *prob_vec + double inv_prob, sum_gamma_i_to_beta, sum_exp_theta_c_i + long long route_set_idx + size_t i, j + + with parallel(num_threads=cores): + for route_set_idx in prange(total_costs.size()): + total_cost = total_costs[route_set_idx] + gamma_vec = gammas[route_set_idx] + prob_vec = new vector[double]() + prob_vec.reserve(total_cost.size()) + + + for i in range(total_cost.size()): + inv_prob = 0.0 + for j in range(total_cost.size()): + inv_prob = inv_prob + pow(deref(gamma_vec)[j] / deref(gamma_vec)[i], beta) * exp(-theta * (deref(total_cost)[j] - deref(total_cost)[i])) + + prob_vec.push_back(1.0 / inv_prob) + + # sum_gamma_i_to_beta = 0.0 + # sum_exp_theta_c_i = 0.0 + # for i in range(total_cost.size()): + # sum_gamma_i_to_beta = sum_gamma_i_to_beta + pow(deref(gamma_vec)[i], beta) + # sum_exp_theta_c_i = sum_exp_theta_c_i + exp(beta * deref(total_cost)[i]) + + # for i in range(total_cost.size()): + # inv_prob = 0.0 + # for j in range(total_cost.size()): + # inv_prob = inv_prob + exp(-beta * deref(total_cost)[j]) * pow(deref(gamma_vec)[j], beta) + # inv_prob = inv_prob * sum_exp_theta_c_i / sum_gamma_i_to_beta + + # prob_vec.push_back(1.0 / inv_prob) + + deref(prob_set)[route_set_idx] = prob_vec + + return prob_set + @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 62beab925..123b1003b 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -195,6 +195,34 @@ def test_cost_results(self): for route, cost in zip(df["route set"].values, cost_vec): np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + # def test_psl_results(self): + # np.random.seed(0) + # rc = RouteChoiceSet(self.graph) + # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + # table, psls = rc.batched(nodes, max_routes=20, max_depth=10, psl_as_well=True) + # table = table.to_pandas() + + # gb = table.groupby(by=["origin id", "destination id"]) + # breakpoint() + # for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation + # df = gb.get_group(od) + # for route, cost in zip(df["route set"].values, cost_vec): + # np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + + def test_prob_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table, costs, psls, probs = rc.batched(nodes, max_routes=20, max_depth=10, prob_as_well=True, cost_as_well=True, psl_as_well=True) + table = table.to_pandas() + + breakpoint() + for prob_vec in probs: + self.assertAlmostEqual(1.0, sum(prob_vec), msg="Probability not close to 1.0") + # for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation + # df = gb.get_group(od) + # for route, cost in zip(df["route set"].values, cost_vec): + # np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") def generate_line_strings(project, graph, results): """Debug method""" From b0627e48ff1132f66ddb043dbcb6c85d1c93edc8 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Mon, 19 Feb 2024 13:40:00 +1000 Subject: [PATCH 23/33] Fix probability calculations --- aequilibrae/paths/route_choice.pyx | 26 ++++++++------------ tests/aequilibrae/paths/test_route_choice.py | 1 - 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index ea2530dbc..272119407 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -651,6 +651,10 @@ cdef class RouteChoiceSet: return cost_set + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) @staticmethod cdef vector[vector[double] *] *compute_psl( vector[RouteSet_t *] &route_sets, @@ -710,6 +714,10 @@ cdef class RouteChoiceSet: return psl_set + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) @staticmethod cdef vector[vector[double] *] *compute_prob( vector[vector[double] *] &total_costs, @@ -725,7 +733,7 @@ cdef class RouteChoiceSet: vector[double] *total_cost vector[double] *gamma_vec vector[double] *prob_vec - double inv_prob, sum_gamma_i_to_beta, sum_exp_theta_c_i + double inv_prob long long route_set_idx size_t i, j @@ -736,7 +744,7 @@ cdef class RouteChoiceSet: prob_vec = new vector[double]() prob_vec.reserve(total_cost.size()) - + # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results for i in range(total_cost.size()): inv_prob = 0.0 for j in range(total_cost.size()): @@ -744,20 +752,6 @@ cdef class RouteChoiceSet: prob_vec.push_back(1.0 / inv_prob) - # sum_gamma_i_to_beta = 0.0 - # sum_exp_theta_c_i = 0.0 - # for i in range(total_cost.size()): - # sum_gamma_i_to_beta = sum_gamma_i_to_beta + pow(deref(gamma_vec)[i], beta) - # sum_exp_theta_c_i = sum_exp_theta_c_i + exp(beta * deref(total_cost)[i]) - - # for i in range(total_cost.size()): - # inv_prob = 0.0 - # for j in range(total_cost.size()): - # inv_prob = inv_prob + exp(-beta * deref(total_cost)[j]) * pow(deref(gamma_vec)[j], beta) - # inv_prob = inv_prob * sum_exp_theta_c_i / sum_gamma_i_to_beta - - # prob_vec.push_back(1.0 / inv_prob) - deref(prob_set)[route_set_idx] = prob_vec return prob_set diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 123b1003b..9d87873ca 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -216,7 +216,6 @@ def test_prob_results(self): table, costs, psls, probs = rc.batched(nodes, max_routes=20, max_depth=10, prob_as_well=True, cost_as_well=True, psl_as_well=True) table = table.to_pandas() - breakpoint() for prob_vec in probs: self.assertAlmostEqual(1.0, sum(prob_vec), msg="Probability not close to 1.0") # for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation From 8c4611504297274c00d2d32436618bddeacba246 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 20 Feb 2024 15:20:36 +1000 Subject: [PATCH 24/33] The great refactor, batch probability computations to dump to disk Moves the computation of the path sized logit inside the main multithreaded loop, this lets us batch them as well and dump them to disk along with the reset of the tables. Enable multithreading by default. Catch a handle of memory leaks. --- aequilibrae/paths/route_choice.pxd | 50 ++- aequilibrae/paths/route_choice.pyx | 399 ++++++++++--------- tests/aequilibrae/paths/test_route_choice.py | 55 +-- 3 files changed, 258 insertions(+), 246 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 3a05237d6..bf4671d47 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -14,7 +14,7 @@ from libcpp.memory cimport shared_ptr # std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves # from libcpp.random cimport minstd_rand -from libc.stdint cimport uint_fast32_t, uint_fast64_t +from libc.stdint cimport * cdef extern from "" namespace "std" nogil: cdef cppclass random_device: @@ -109,6 +109,21 @@ ctypedef unordered_set[vector[long long] *, OrderedVectorPointerHasher, PointerD ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, PointerDereferenceEqualTo[unordered_set[long long] *]] LinkSet_t ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t +# Pyarrow's Cython API does not provide all the functions available in the C++ API, some of them are really useful. +# Here we redeclare the classes with the functions we want, these are available in the current namespace, *not* libarrow +cdef extern from "arrow/builder.h" namespace "arrow" nogil: + + cdef cppclass CUInt32Builder" arrow::UInt32Builder"(libpa.CArrayBuilder): + CUInt32Builder(libpa.CMemoryPool* pool) + libpa.CStatus Append(const uint32_t value) + libpa.CStatus AppendValues(const vector[uint32_t] &values) + libpa.CStatus AppendValues(vector[uint32_t].const_reverse_iterator values_begin, vector[uint32_t].const_reverse_iterator values_end) + + cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(libpa.CArrayBuilder): + CDoubleBuilder(libpa.CMemoryPool* pool) + libpa.CStatus Append(const double value) + libpa.CStatus AppendValues(const vector[double] &values) + cdef class RouteChoiceSet: cdef: @@ -166,32 +181,35 @@ cdef class RouteChoiceSet: ) noexcept nogil @staticmethod - cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil @staticmethod - cdef vector[vector[double] *] *compute_cost(vector[RouteSet_t *] &route_sets, double[:] cost_view, unsigned int cores) noexcept nogil + cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil @staticmethod - cdef vector[vector[double] *] *compute_psl( - vector[RouteSet_t *] &route_sets, - vector[pair[vector[long long] *, vector[long long] *]] &freq_sets, - vector[vector[double] *] &total_costs, - double[:] cost_view, - unsigned int cores + cdef vector[double] *compute_gamma( + RouteSet_t *route_set, + pair[vector[long long] *, vector[long long] *] &freq_set, + vector[double] &total_cost, + double[:] cost_view ) noexcept nogil @staticmethod - cdef vector[vector[double] *] *compute_prob( - vector[vector[double] *] &total_costs, - vector[vector[double] *] &gammas, + cdef vector[double] *compute_prob( + vector[double] &total_cost, + vector[double] &gamma_vec, double beta, - double theta, - unsigned int cores + double theta ) noexcept nogil @staticmethod - cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets) - + cdef shared_ptr[libpa.CTable] make_table_from_results( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &route_sets, + vector[vector[double] *] *cost_set, + vector[vector[double] *] *gamma_set, + vector[vector[double] *] *prob_set + ) cdef class Checkpoint: cdef: diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 272119407..b22dd0c08 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -58,6 +58,7 @@ from libc.math cimport INFINITY, pow, exp from libc.string cimport memcpy from libc.limits cimport UINT_MAX from libc.stdlib cimport abort +from libcpp cimport nullptr from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map @@ -65,6 +66,7 @@ from libcpp.utility cimport pair from libcpp.algorithm cimport sort, lower_bound from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid +cimport openmp import numpy as np import pyarrow as pa @@ -101,6 +103,15 @@ cdef class RouteChoiceSet: pa.field("route set", route_set_dtype, nullable=False), ]) + psl_schema = pa.schema([ + pa.field("origin id", pa.uint32(), nullable=False), + pa.field("destination id", pa.uint32(), nullable=False), + pa.field("route set", route_set_dtype, nullable=False), + pa.field("cost", pa.float64(), nullable=False), + pa.field("gamma", pa.float64(), nullable=False), + pa.field("probability", pa.float64(), nullable=False), + ]) + def __cinit__(self): """C level init. For C memory allocation and initialisation. Called exactly once per object.""" pass @@ -162,15 +173,12 @@ cdef class RouteChoiceSet: max_routes: int = 0, max_depth: int = 0, seed: int = 0, - cores: int = 1, + cores: int = 0, a_star: bool = True, bfsle: bool = True, penalty: float = 0.0, where: Optional[str] = None, - freq_as_well = False, - cost_as_well = False, - psl_as_well = False, - prob_as_well = False, + path_size_logit: bool = False, beta: float = 1.0, theta: float = 1.0, ): @@ -187,7 +195,7 @@ cdef class RouteChoiceSet: **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation. Must be non-negative. Default of ``0`` for unlimited. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. - **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``1``. + **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available. **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. @@ -202,15 +210,18 @@ cdef class RouteChoiceSet: if max_routes == 0 and max_depth == 0: raise ValueError("Either `max_routes` or `max_depth` must be > 0") - if max_routes < 0 or max_depth < 0 or cores < 0: + if max_routes < 0 or max_depth < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") if penalty != 0.0 and bfsle: - raise ValueError("Link penalisation (`penatly` > 1.0) and `bfsle` cannot be enabled at once") + raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once") if not bfsle and penalty <= 1.0: raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") + if path_size_logit and (beta < 0 or theta <= 0): + raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model") + for o, d in ods: if self.nodes_to_indices_view[o] == -1: raise ValueError(f"Origin {o} is not present within the compact graph") @@ -222,28 +233,29 @@ cdef class RouteChoiceSet: unsigned int c_max_routes = max_routes unsigned int c_max_depth = max_depth unsigned int c_seed = seed - unsigned int c_cores = cores + unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads() vector[pair[long long, long long]] c_ods # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory - double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) - long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() + double [:, :] cost_matrix = np.empty((c_cores, self.cost_view.shape[0]), dtype=float) + long long [:, :] predecessors_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] conn_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (c_cores, self.b_nodes_view.shape[0])).copy() # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the # interface. long long [:, :] _reached_first_matrix vector[RouteSet_t *] *results + size_t max_results_len, batch_len, j # self.a_star = a_star if self.a_star: - _reached_first_matrix = np.zeros((cores, 1), dtype=np.int64) # Dummy array to allow slicing + _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64) # Dummy array to allow slicing else: - _reached_first_matrix = np.zeros((cores, self.num_nodes + 1), dtype=np.int64) + _reached_first_matrix = np.zeros((c_cores, self.num_nodes + 1), dtype=np.int64) set_ods = set(ods) if len(set_ods) != len(ods): @@ -252,16 +264,47 @@ cdef class RouteChoiceSet: if where is not None: checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) batches = list(Checkpoint.batches(list(set_ods))) - results = new vector[RouteSet_t *](max(len(batch) for batch in batches)) + max_results_len = max(len(batch) for batch in batches) else: batches = [list(set_ods)] - results = new vector[RouteSet_t *](len(set_ods)) + max_results_len = len(set_ods) + + results = new vector[RouteSet_t *](max_results_len) + + cdef: + RouteSet_t *route_set + pair[vector[long long] *, vector[long long] *] freq_pair + vector[long long] *link_union = nullptr + vector[vector[double] *] *cost_set = nullptr + vector[vector[double] *] *gamma_set = nullptr + vector[vector[double] *] *prob_set= nullptr + + if path_size_logit: + cost_set = new vector[vector[double] *](max_results_len) + gamma_set = new vector[vector[double] *](max_results_len) + prob_set = new vector[vector[double] *](max_results_len) for batch in batches: - results.resize(len(batch)) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer + batch_len = c_ods.size() + results.resize(batch_len) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed + + if path_size_logit: + cost_set.clear() + gamma_set.clear() + prob_set.clear() + + cost_set.resize(batch_len) + gamma_set.resize(batch_len) + prob_set.resize(batch_len) + with nogil, parallel(num_threads=c_cores): - for i in prange(c_ods.size()): + # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. + # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch + if path_size_logit: + link_union = new vector[long long]() + + for i in prange(batch_len): origin_index = self.nodes_to_indices_view[c_ods[i].first] dest_index = self.nodes_to_indices_view[c_ods[i].second] @@ -276,7 +319,7 @@ cdef class RouteChoiceSet: ) if bfsle: - deref(results)[i] = RouteChoiceSet.bfsle( + route_set = RouteChoiceSet.bfsle( self, origin_index, dest_index, @@ -290,7 +333,7 @@ cdef class RouteChoiceSet: c_seed, ) else: - deref(results)[i] = RouteChoiceSet.link_penalisation( + route_set = RouteChoiceSet.link_penalisation( self, origin_index, dest_index, @@ -305,6 +348,17 @@ cdef class RouteChoiceSet: c_seed, ) + if path_size_logit: + link_union.clear() + freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union)) + deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view) + deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) + deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta) + del freq_pair.first + del freq_pair.second + + deref(results)[i] = route_set + if self.block_flows_through_centroids: blocking_centroid_flows( 1, # Always unblocking @@ -315,60 +369,39 @@ cdef class RouteChoiceSet: self.b_nodes_view, ) - table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results))) - - returns = [] + if path_size_logit: + del link_union - freqs = RouteChoiceSet.frequency(deref(results), c_cores) - if freq_as_well: - freqs_list = [] - for freq in deref(freqs): - freqs_list.append( - ( - list(deref(freq.first)), - list(deref(freq.second)), - ) - ) - returns.append(freqs_list) - - costs = RouteChoiceSet.compute_cost(deref(results), self.cost_view, c_cores) - if cost_as_well: - costs_list = [] - for cost_vec in deref(costs): - costs_list.append(list(deref(cost_vec))) - returns.append(costs_list) - - psls = RouteChoiceSet.compute_psl(deref(results), deref(freqs), deref(costs), self.cost_view, c_cores) - if psl_as_well: - psls_list = [] - for psl_vec in deref(psls): - psls_list.append(list(deref(psl_vec))) - returns.append(psls_list) - - probs = RouteChoiceSet.compute_prob(deref(costs), deref(psls), beta, theta, c_cores) - if prob_as_well: - probs_list = [] - for prob_vec in deref(probs): - probs_list.append(list(deref(prob_vec))) - returns.append(probs_list) - - # Once we've made the table all results have been copied into some pyarrow structure, we can free our internal structures - for result in deref(results): - for route in deref(result): - del route + table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) - if where is None: # There was only one batch anyway - if returns: - return table, *returns - else: - return table + # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures + if path_size_logit: + for j in range(batch_len): + del deref(cost_set)[j] + del deref(gamma_set)[j] + del deref(prob_set)[j] + for j in range(batch_len): + for route in deref(deref(results)[j]): + del route - checkpoint.write(table) + if where is not None: + checkpoint.write(table) + del table + else: + break # There was only one batch anyway - del table + # We're done with everything now, we can free the outer internal structures + if path_size_logit: + del cost_set + del gamma_set + del prob_set + del results - return + if where is None: + return table + else: + return @cython.initializedcheck(False) cdef void path_find( @@ -570,98 +603,78 @@ cdef class RouteChoiceSet: @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef vector[pair[vector[long long] *, vector[long long] *]] *frequency(vector[RouteSet_t *] &route_sets, unsigned int cores) noexcept nogil: + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil: cdef: - vector[pair[vector[long long] *, vector[long long] *]] *freq_set = new vector[pair[vector[long long] *, vector[long long] *]]() vector[long long] *keys vector[long long] *counts # Scratch objects - vector[long long] *link_union size_t length, count long long link, i - freq_set.reserve(route_sets.size()) - - with parallel(num_threads=cores): - link_union = new vector[long long]() - for i in prange(route_sets.size()): - route_set = route_sets[i] + link_union.clear() - link_union.clear() + keys = new vector[long long]() + counts = new vector[long long]() - keys = new vector[long long]() - counts = new vector[long long]() + length = 0 + for route in deref(route_set): + length = length + route.size() + link_union.reserve(length) - length = 0 - for route in deref(route_set): - length = length + route.size() - link_union.reserve(length) + for route in deref(route_set): + link_union.insert(link_union.end(), route.begin(), route.end()) - for route in deref(route_set): - link_union.insert(link_union.end(), route.begin(), route.end()) + sort(link_union.begin(), link_union.end()) - sort(link_union.begin(), link_union.end()) + union_iter = link_union.begin() + while union_iter != link_union.end(): + count = 0 + link = deref(union_iter) + while link == deref(union_iter): + count = count + 1 + inc(union_iter) - union_iter = link_union.begin() - while union_iter != link_union.end(): - count = 0 - link = deref(union_iter) - while link == deref(union_iter): - count = count + 1 - inc(union_iter) + keys.push_back(link) + counts.push_back(count) - keys.push_back(link) - counts.push_back(count) - - freq_set.emplace(freq_set.cbegin() + i, keys, counts) - - del link_union - - return freq_set + return make_pair(keys, counts) @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef vector[vector[double] *] *compute_cost(vector[RouteSet_t *] &route_sets, double[:] cost_view, unsigned int cores) noexcept nogil: + cdef vector[double] *compute_cost(RouteSet_t *route_set, double[:] cost_view) noexcept nogil: cdef: - vector[vector[double] *] *cost_set = new vector[vector[double] *](route_sets.size()) vector[double] *cost_vec # Scratch objects double cost long long link, i - with parallel(num_threads=cores): - for i in prange(route_sets.size()): - route_set = route_sets[i] - cost_vec = new vector[double]() - cost_vec.reserve(route_set.size()) + cost_vec = new vector[double]() + cost_vec.reserve(route_set.size()) - for route in deref(route_set): - cost = 0.0 - for link in deref(route): - cost = cost + cost_view[link] + for route in deref(route_set): + cost = 0.0 + for link in deref(route): + cost = cost + cost_view[link] - cost_vec.push_back(cost) + cost_vec.push_back(cost) - deref(cost_set)[i] = cost_vec - - return cost_set + return cost_vec @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef vector[vector[double] *] *compute_psl( - vector[RouteSet_t *] &route_sets, - vector[pair[vector[long long] *, vector[long long] *]] &freq_sets, - vector[vector[double] *] &total_costs, - double[:] cost_view, - unsigned int cores + cdef vector[double] *compute_gamma( + RouteSet_t *route_set, + pair[vector[long long] *, vector[long long] *] &freq_set, + vector[double] &total_cost, + double[:] cost_view ) noexcept nogil: """ Notation changes: @@ -673,100 +686,88 @@ cdef class RouteChoiceSet: sum_{k in R}: delta_{a,k}: freq_set """ cdef: - vector[vector[double] *] *psl_set = new vector[vector[double] *](route_sets.size()) - vector[double] *psl_vec + vector[double] *gamma_vec # Scratch objects - pair[vector[long long] *, vector[long long] *] freq_set vector[long long].const_iterator link_iter - vector[double] *total_cost double gamma long long link, j size_t i - with parallel(num_threads=cores): - for i in prange(route_sets.size()): - route_set = route_sets[i] - freq_set = freq_sets[i] - total_cost = total_costs[i] - psl_vec = new vector[double]() - psl_vec.reserve(route_set.size()) + gamma_vec = new vector[double]() + gamma_vec.reserve(route_set.size()) - j = 0 - for route in deref(route_set): - gamma = 0.0 - for link in deref(route): - # We know the frequency table is ordered and contains every link in the union of the routes. - # We want to find the index of the link, and use that to look up it's frequency - link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) + j = 0 + for route in deref(route_set): + gamma = 0.0 + for link in deref(route): + # We know the frequency table is ordered and contains every link in the union of the routes. + # We want to find the index of the link, and use that to look up it's frequency + link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) - if link != deref(link_iter): - fprintf(stderr, "Link %d not found in link set? not possible??\n", link) - abort() + gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] - gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + gamma_vec.push_back(gamma / total_cost[j]) - psl_vec.push_back(gamma / deref(total_cost)[j]) + j = j + 1 - j = j + 1 - - deref(psl_set)[i] = psl_vec - - return psl_set + return gamma_vec @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef vector[vector[double] *] *compute_prob( - vector[vector[double] *] &total_costs, - vector[vector[double] *] &gammas, + cdef vector[double] *compute_prob( + vector[double] &total_cost, + vector[double] &gamma_vec, double beta, - double theta, - unsigned int cores + double theta ) noexcept nogil: cdef: - vector[vector[double] *] *prob_set = new vector[vector[double] *](total_costs.size()) - # Scratch objects - vector[double] *total_cost - vector[double] *gamma_vec vector[double] *prob_vec double inv_prob long long route_set_idx size_t i, j - with parallel(num_threads=cores): - for route_set_idx in prange(total_costs.size()): - total_cost = total_costs[route_set_idx] - gamma_vec = gammas[route_set_idx] - prob_vec = new vector[double]() - prob_vec.reserve(total_cost.size()) - - # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results - for i in range(total_cost.size()): - inv_prob = 0.0 - for j in range(total_cost.size()): - inv_prob = inv_prob + pow(deref(gamma_vec)[j] / deref(gamma_vec)[i], beta) * exp(-theta * (deref(total_cost)[j] - deref(total_cost)[i])) + prob_vec = new vector[double]() + prob_vec.reserve(total_cost.size()) - prob_vec.push_back(1.0 / inv_prob) + # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results + for i in range(total_cost.size()): + inv_prob = 0.0 + for j in range(total_cost.size()): + inv_prob = inv_prob + pow(gamma_vec[j] / gamma_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i])) - deref(prob_set)[route_set_idx] = prob_vec + prob_vec.push_back(1.0 / inv_prob) - return prob_set + return prob_vec @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef shared_ptr[libpa.CTable] make_table_from_results(vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets): + cdef shared_ptr[libpa.CTable] make_table_from_results( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &route_sets, + vector[vector[double] *] *cost_set, + vector[vector[double] *] *gamma_set, + vector[vector[double] *] *prob_set + ): cdef: shared_ptr[libpa.CArray] paths shared_ptr[libpa.CArray] offsets + libpa.CMemoryPool *pool = libpa.c_get_memory_pool() - libpa.CUInt32Builder *path_builder = new libpa.CUInt32Builder(pool) + + # Custom imports, these are declared in route_choice.pxd *not* libarrow. + CUInt32Builder *path_builder = new CUInt32Builder(pool) + CDoubleBuilder *cost_col = nullptr + CDoubleBuilder *gamma_col = nullptr + CDoubleBuilder *prob_col = nullptr + libpa.CInt32Builder *offset_builder = new libpa.CInt32Builder(pool) # Must be Int32 *not* UInt32 libpa.CUInt32Builder *o_col = new libpa.CUInt32Builder(pool) libpa.CUInt32Builder *d_col = new libpa.CUInt32Builder(pool) @@ -775,13 +776,21 @@ cdef class RouteChoiceSet: libpa.CResult[shared_ptr[libpa.CArray]] route_set_results - vector[long long].reverse_iterator riter - int offset = 0 + bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr) + + # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route] + columns.resize(6 if psl else 3) - columns.emplace_back(shared_ptr[libpa.CArray]()) # Origins - columns.emplace_back(shared_ptr[libpa.CArray]()) # Destination - columns.emplace_back(shared_ptr[libpa.CArray]()) # Route set + if psl: + cost_col = new CDoubleBuilder(pool) + gamma_col = new CDoubleBuilder(pool) + prob_col = new CDoubleBuilder(pool) + + for i in range(ods.size()): + cost_col.AppendValues(deref(deref(cost_set)[i])) + gamma_col.AppendValues(deref(deref(gamma_set)[i])) + prob_col.AppendValues(deref(deref(prob_set)[i])) for i in range(ods.size()): route_set = route_sets[i] @@ -791,18 +800,14 @@ cdef class RouteChoiceSet: for route in deref(route_set): o_col.Append(ods[i].first) d_col.Append(ods[i].second) + offset_builder.Append(offset) - # TODO: Pyarrows Cython API is incredibly lacking, it's just functional and doesn't include all the nice things the C++ API has - # One such thing its missing the AppendValues, which can add whole iterators at once in a much smarter fashion. - # We'll have to reimport/extern the classes we use if we want to avoid the below - riter = route.rbegin() - while riter != route.rend(): - path_builder.Append(deref(riter)) - inc(riter) + path_builder.AppendValues(route.crbegin(), route.crend()) offset += route.size() path_builder.Finish(&paths) + offset_builder.Append(offset) # Mark the end of the array in offsets offset_builder.Finish(&offsets) @@ -812,12 +817,24 @@ cdef class RouteChoiceSet: d_col.Finish(&columns[1]) columns[2] = deref(route_set_results) - cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(libpa.pyarrow_unwrap_schema(RouteChoiceSet.schema), columns) + if psl: + cost_col.Finish(&columns[3]) + gamma_col.Finish(&columns[4]) + prob_col.Finish(&columns[5]) + + cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema) + cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(schema, columns) del path_builder del offset_builder del o_col del d_col + + if psl: + del cost_col + del gamma_col + del prob_col + return table diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 9d87873ca..b68f64d97 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -150,8 +150,8 @@ def test_round_trip(self): max_routes = 20 path = join(self.project.project_base_path, "batched results") - table = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) - rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1, where=path) + table = rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path) dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) new_table = ( @@ -165,63 +165,40 @@ def test_round_trip(self): pd.testing.assert_frame_equal(table, new_table) - def test_frequency_results(self): + def test_cost_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table, freqs = rc.batched(nodes, max_routes=20, max_depth=10, freq_as_well=True) + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) table = table.to_pandas() gb = table.groupby(by=["origin id", "destination id"]) - for od, freq in zip(set(nodes), freqs): # iteration order is changed by set operation - df = gb.get_group(od) - bincount = np.bincount(np.hstack(df["route set"].values)) - keys = bincount.nonzero()[0] - - vals = bincount[keys] - self.assertListEqual(list(keys), freq[0], "Keys of frequencies differreturns") - self.assertListEqual(list(vals), freq[1], "Values of frequencies differ") + for od, df in gb: + for route, cost in zip(df["route set"].values, df["cost"].values): + np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") - def test_cost_results(self): + def test_gamma_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table, costs = rc.batched(nodes, max_routes=20, max_depth=10, cost_as_well=True) + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) table = table.to_pandas() gb = table.groupby(by=["origin id", "destination id"]) - for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation - df = gb.get_group(od) - for route, cost in zip(df["route set"].values, cost_vec): - np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") - - # def test_psl_results(self): - # np.random.seed(0) - # rc = RouteChoiceSet(self.graph) - # nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - # table, psls = rc.batched(nodes, max_routes=20, max_depth=10, psl_as_well=True) - # table = table.to_pandas() - - # gb = table.groupby(by=["origin id", "destination id"]) - # breakpoint() - # for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation - # df = gb.get_group(od) - # for route, cost in zip(df["route set"].values, cost_vec): - # np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + for od, df in gb: + self.assertTrue(all((df["gamma"] > 0) & (df["gamma"] <= 1))) def test_prob_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table, costs, psls, probs = rc.batched(nodes, max_routes=20, max_depth=10, prob_as_well=True, cost_as_well=True, psl_as_well=True) + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) table = table.to_pandas() - for prob_vec in probs: - self.assertAlmostEqual(1.0, sum(prob_vec), msg="Probability not close to 1.0") - # for od, cost_vec in zip(set(nodes), costs): # iteration order is changed by set operation - # df = gb.get_group(od) - # for route, cost in zip(df["route set"].values, cost_vec): - # np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + gb = table.groupby(by=["origin id", "destination id"]) + for od, df in gb: + self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0") + def generate_line_strings(project, graph, results): """Debug method""" From 1f195b873402fc76c0da7c054696a0791bfccf17 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 20 Feb 2024 15:26:16 +1000 Subject: [PATCH 25/33] Revert "Update CI workflows" This reverts commit e3ea0c67f0108d09645e52fa514e23edf2e12b71. --- .github/workflows/build_linux.yml | 1 - .github/workflows/build_mac.yml | 1 - .github/workflows/debug_tests.yml | 1 - .github/workflows/documentation.yml | 1 - .github/workflows/test_linux_with_coverage.yml | 1 - .github/workflows/unit_tests.yml | 2 -- 6 files changed, 7 deletions(-) diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml index 51974e264..34713501d 100644 --- a/.github/workflows/build_linux.yml +++ b/.github/workflows/build_linux.yml @@ -17,7 +17,6 @@ jobs: - name: Install packages run: | python -m pip install --upgrade pip wheel cffi setuptools twine pyarrow - python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Build manylinux Python wheels uses: RalfG/python-wheels-manylinux-build@v0.7.1 diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml index 4ae1de2b5..401204b28 100644 --- a/.github/workflows/build_mac.yml +++ b/.github/workflows/build_mac.yml @@ -28,7 +28,6 @@ jobs: python -m pip install --upgrade pip setuptools wheel twine pip install -r requirements.txt pip install -r requirements_additional.txt - python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Build run: | diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 8197b13af..15cae28c1 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -15,7 +15,6 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt - python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Download spatialite on Linux run: | diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 09c008be6..065c9ffd4 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -27,7 +27,6 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r docs/requirements-docs.txt - sudo python -c "import pyarrow; pyarrow.create_library_symlinks()" sudo apt update sudo apt install -y --fix-missing libsqlite3-mod-spatialite libspatialite-dev pandoc sudo ln -s /usr/lib/x86_64-linux-gnu/mod_spatialite.so /usr/lib/x86_64-linux-gnu/mod_spatialite diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index f9d87903b..38b5ae19f 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -23,7 +23,6 @@ jobs: pip3 install spatialite pip3 install pytest pip3 install pytest-cov - python3 -c "import pyarrow; pyarrow.create_library_symlinks()" sudo ln -s /usr/lib/x86_64-linux-gnu/mod_spatialite.so /usr/lib/x86_64-linux-gnu/mod_spatialite - name: Compile library run: | diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 3133ec982..5b6af38d9 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -19,7 +19,6 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt - python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Lint with ruff run: ruff . @@ -48,7 +47,6 @@ jobs: pip install -r requirements.txt pip install -r requirements_additional.txt pip install -r tests/requirements_tests.txt - python -c "import pyarrow; pyarrow.create_library_symlinks()" - name: Download spatialite on Linux if: matrix.os == 'ubuntu-latest' From b44721e41617fe163462921d2ccd36038e724c99 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 20 Feb 2024 15:34:21 +1000 Subject: [PATCH 26/33] Move the pyarrow symlink creation to setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 3fe13c06e..b68b463a6 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ if iutil.find_spec("pyarrow") is not None: import pyarrow as pa + pa.create_library_symlinks() include_dirs.append(pa.get_include()) libraries.extend(pa.get_libraries()) library_dirs.extend(pa.get_library_dirs()) From ab37eaf640451231956ed761515c4bf37be89fba Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Tue, 20 Feb 2024 11:44:17 +0000 Subject: [PATCH 27/33] CI --- .github/workflows/build_linux.yml | 76 ----------------------------- .github/workflows/build_mac.yml | 57 ---------------------- .github/workflows/build_wheels.yml | 58 ++++++++++++++++++++++ .github/workflows/build_windows.yml | 46 ----------------- pyproject.toml | 45 ++++++++++++++++- 5 files changed, 102 insertions(+), 180 deletions(-) delete mode 100644 .github/workflows/build_linux.yml delete mode 100644 .github/workflows/build_mac.yml create mode 100644 .github/workflows/build_wheels.yml delete mode 100644 .github/workflows/build_windows.yml diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml deleted file mode 100644 index 34713501d..000000000 --- a/.github/workflows/build_linux.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Build and upload Linux Python Package (includes QGIS artifacts) - -on: [pull_request, release] - -jobs: - deploy: - runs-on: ubuntu-20.04 - env: - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - continue-on-error: true - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install packages - run: | - python -m pip install --upgrade pip wheel cffi setuptools twine pyarrow - - - name: Build manylinux Python wheels - uses: RalfG/python-wheels-manylinux-build@v0.7.1 - with: - python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312' - pip-wheel-args: '--no-deps' - - - name: Moves wheels - run: | - mkdir -p dist - cp -v ./*-manylinux*.whl dist/ - -# - name: update versions to conform with QGIS -# run: | -# cd .github -# python qgis_requirements.py -# cd .. -# -# - name: Build manylinux Python wheels -# uses: RalfG/python-wheels-manylinux-build@v0.7.1 -# with: -# python-versions: 'cp39-cp39' -# pip-wheel-args: '--no-deps' -# -# - name: Moves wheels -# run: | -# mkdir -p dist -# cp -v ./*-manylinux*.whl dist/ - - - name: Stores artifacts along with the workflow result - if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish wheels to PyPI - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/*-manylinux*.whl - - - name: Save wheels to AWS - if: ${{ (env.HAS_SECRETS == 'true') }} - uses: jakejarvis/s3-sync-action@master - with: - args: --acl public-read --follow-symlinks - env: - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: 'us-east-1' # optional: defaults to us-east-1 - SOURCE_DIR: 'dist/' # optional: defaults to entire repository - DEST_DIR: 'wheels/' # optional: defaults to entire repository diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml deleted file mode 100644 index 401204b28..000000000 --- a/.github/workflows/build_mac.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Build and upload MacOS Python Package - -on: [pull_request, release] - -jobs: - deploy: - runs-on: macos-latest - env: - # Note that compiler options for macos-latest are documented here: - # https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - CC: "gcc-12" - CXX: "g++-12" - continue-on-error: true - strategy: - matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - steps: - - uses: actions/checkout@v4 - - name: Set Python environment - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel twine - pip install -r requirements.txt - pip install -r requirements_additional.txt - - - name: Build - run: | - python setup.py sdist bdist_wheel - - - name: Stores artifacts along with the workflow result - if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/*.whl - - - name: Publish tar files - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') && (matrix.python-version == '3.11') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: twine upload dist/*.gz diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml new file mode 100644 index 000000000..086c8f1bb --- /dev/null +++ b/.github/workflows/build_wheels.yml @@ -0,0 +1,58 @@ +name: Build + +on: [pull_request] + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.5 + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl + + make_sdist: + name: Make SDist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build SDist + run: pipx run build --sdist + + - uses: actions/upload-artifact@v4 + with: + name: cibw-sdist + path: dist/*.tar.gz + + upload_all: + needs: [build_wheels, make_sdist] + environment: pypi + permissions: + id-token: write + runs-on: ubuntu-latest + if: github.event_name == 'release' && github.event.action == 'published' + steps: + - uses: actions/download-artifact@v4 + with: + pattern: cibw-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml deleted file mode 100644 index 3d4677e74..000000000 --- a/.github/workflows/build_windows.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Build and upload Windows Python Package - -on: [pull_request, release] - -jobs: - deploy: - runs-on: windows-2019 - env: - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - continue-on-error: true - strategy: - matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - architecture: ['x64'] - steps: - - uses: actions/checkout@v4 - - name: Set Python environment - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: ${{ matrix.architecture }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel twine - pip install -r requirements.txt - pip install -r requirements_additional.txt - - - name: Build - run: python setup.py bdist_wheel - - - - name: Stores artifacts along with the workflow result -# if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: twine upload dist/*.whl diff --git a/pyproject.toml b/pyproject.toml index ff2ab1ff1..92281edb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] +requires = ["setuptools", "numpy", "cython", "pandas", "pyarrow", "wheel"] [tool.ruff] @@ -55,3 +55,46 @@ target-version = "py39" [tool.ruff.mccabe] max-complexity = 20 + +[tool.cibuildwheel] +# While we test this facility we we will only build for 3.10 +#build = ["cp310-*"] +build = ["cp38-*", "cp39-*","cp310-*", "cp311-*", "cp312-*"] + +# We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows +skip = ["cp36-*", "cp37-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] +test-skip = "" + +archs = ["auto"] +build-frontend = "default" +config-settings = {} +dependency-versions = "pinned" +environment = {} +environment-pass = [] +build-verbosity = 0 + +before-all = "" +before-build = "" +repair-wheel-command = "" + +test-command = "" +before-test = "" +test-requires = [] +test-extras = [] + +container-engine = "docker" + +manylinux-x86_64-image = "manylinux2014" +manylinux-aarch64-image = "manylinux2014" +manylinux-pypy_x86_64-image = "manylinux2014" +manylinux-pypy_aarch64-image = "manylinux2014" + +[tool.cibuildwheel.linux] +archs = ["auto", "aarch64"] +repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}" + +[tool.cibuildwheel.macos] +#archs = ["x86_64", "universal2", "arm64"] +archs = ["x86_64"] +environment = { CC="gcc-12", CXX="g++-12" } +repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" From 0df75659a9b284417833539fb949bfdbb41626c2 Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Tue, 20 Feb 2024 11:50:06 +0000 Subject: [PATCH 28/33] . --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 92281edb9..82d63dffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,10 +59,10 @@ max-complexity = 20 [tool.cibuildwheel] # While we test this facility we we will only build for 3.10 #build = ["cp310-*"] -build = ["cp38-*", "cp39-*","cp310-*", "cp311-*", "cp312-*"] +build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"] # We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows -skip = ["cp36-*", "cp37-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] +skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] test-skip = "" archs = ["auto"] From ccb9cfa2d1c21baddee213e4c85ffae69598b469 Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Tue, 20 Feb 2024 12:00:19 +0000 Subject: [PATCH 29/33] . --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 82d63dffe..1c8551c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ manylinux-pypy_aarch64-image = "manylinux2014" [tool.cibuildwheel.linux] archs = ["auto", "aarch64"] +build = ["cp310-*", "cp311-*", "cp312-*"] repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}" [tool.cibuildwheel.macos] From 9fe62b88410b19630dc94fe4e9e47e993d96768c Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Thu, 22 Feb 2024 22:07:41 -0600 Subject: [PATCH 30/33] OSM importer speedup (#506) * OSM Geo-processing using GeoPandas * OSM Geo-processing using GeoPandas * OSM Geo-processing using GeoPandas * OSM Geo-processing using GeoPandas * Processing with chunking * Processing with chunking * Processing with chunking * . * . * . * . * . * . * . * . * . * . * . * . * . * . * . * pandas deprecations * improves griding function * adds network cleaning * adds network cleaning * Allows for use of polygons with rings inside * Allows for use of polygons with rings inside * adjusts types * Update pyproject.toml Co-authored-by: Jake Moss * . * . --------- Co-authored-by: pveigadecamargo Co-authored-by: Jake Moss --- aequilibrae/parameters.yml | 24 - aequilibrae/paths/graph.py | 6 +- aequilibrae/project/network/link_types.py | 6 +- aequilibrae/project/network/network.py | 13 +- .../network/osm/model_area_gridding.py | 26 + .../project/network/osm/osm_builder.py | 565 +++++++++--------- .../project/network/osm/osm_downloader.py | 40 +- aequilibrae/transit/map_matching_graph.py | 3 +- aequilibrae/utils/db_utils.py | 4 + requirements.txt | 3 +- .../project/data/porto_rico.parquet | Bin 0 -> 11961 bytes tests/aequilibrae/project/data/wynnum.parquet | Bin 0 -> 1626 bytes .../project/test_osm_downloader.py | 2 +- .../project/test_polygon_gridding.py | 22 + 14 files changed, 376 insertions(+), 338 deletions(-) create mode 100644 aequilibrae/project/network/osm/model_area_gridding.py create mode 100644 tests/aequilibrae/project/data/porto_rico.parquet create mode 100644 tests/aequilibrae/project/data/wynnum.parquet create mode 100644 tests/aequilibrae/project/test_polygon_gridding.py diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml index d7fe1be3e..9529aa360 100644 --- a/aequilibrae/parameters.yml +++ b/aequilibrae/parameters.yml @@ -42,30 +42,6 @@ network: description: name osm_source: name type: text - - cycleway: - description: cycleway, both way - osm_source: cycleway - type: text - - cycleway_right: - description: cycleway, right - osm_source: cycleway:right - type: text - - cycleway_left: - description: cycleway, left - osm_source: cycleway:left - type: text - - busway: - description: busway - osm_source: busway - type: text - - busway_right: - description: busway, right - osm_source: busway:right - type: text - - busway_left: - description: busway, left - osm_source: busway:left - type: text two-way: - lanes: description: lanes diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 26e37d13c..2a75572c1 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -237,9 +237,9 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray): if nans: self.logger.warning(f"Field(s) {nans} has(ve) at least one NaN value. Check your computations") - df.loc[:, "b_node"] = df.b_node.values.astype(self.__integer_type) - df.loc[:, "id"] = df.id.values.astype(self.__integer_type) - df.loc[:, "link_id"] = df.link_id.values.astype(self.__integer_type) + df["link_id"] = df["link_id"].astype(self.__integer_type) + df["b_node"] = df.b_node.values.astype(self.__integer_type) + df["id"] = df.id.values.astype(self.__integer_type) df["direction"] = df.direction.values.astype(np.int8) return all_nodes, num_nodes, nodes_to_indices, fs, df diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py index 05b3524ae..df3f91140 100644 --- a/aequilibrae/project/network/link_types.py +++ b/aequilibrae/project/network/link_types.py @@ -1,6 +1,7 @@ -from sqlite3 import IntegrityError, Connection -from aequilibrae.project.network.link_type import LinkType +from sqlite3 import IntegrityError + from aequilibrae.project.field_editor import FieldEditor +from aequilibrae.project.network.link_type import LinkType from aequilibrae.project.table_loader import TableLoader from aequilibrae.utils.db_utils import commit_and_close from aequilibrae.utils.spatialite_utils import connect_spatialite @@ -84,7 +85,6 @@ def new(self, link_type_id: str) -> LinkType: tp["link_type_id"] = link_type_id lt = LinkType(tp, self.project) self.__items[link_type_id] = lt - self.logger.warning("Link type has not yet been saved to the database. Do so explicitly") return lt def delete(self, link_type_id: str) -> None: diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index b9d5f8115..68c0a45fa 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -126,6 +126,7 @@ def create_from_osm( model_area: Optional[Polygon] = None, place_name: Optional[str] = None, modes=("car", "transit", "bicycle", "walk"), + clean=True, ) -> None: """ Downloads the network from Open-Street Maps @@ -141,6 +142,9 @@ def create_from_osm( **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter file + **clean** (:obj:`bool`, Optional): Keeps only the links that intersects the model area polygon. Defaults to + True. Does not apply to networks downloaded with a place name + .. code-block:: python >>> from aequilibrae import Project @@ -191,6 +195,7 @@ def create_from_osm( raise ValueError("Coordinates out of bounds. Polygon must be in WGS84") west, south, east, north = model_area.bounds else: + clean = False bbox, report = placegetter(place_name) if bbox is None: msg = f'We could not find a reference for place name "{place_name}"' @@ -229,14 +234,14 @@ def create_from_osm( if subarea.intersects(model_area): polygons.append(subarea) self.logger.info("Downloading data") - self.downloader = OSMDownloader(polygons, modes, logger=self.logger) + dwnloader = OSMDownloader(polygons, modes, logger=self.logger) if pyqt: - self.downloader.downloading.connect(self.signal_handler) + dwnloader.downloading.connect(self.signal_handler) - self.downloader.doWork() + dwnloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area) + self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area, clean=clean) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py new file mode 100644 index 000000000..bcaa1a6a3 --- /dev/null +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -0,0 +1,26 @@ +# Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html +from math import ceil + +import geopandas as gpd +import numpy as np +from shapely.geometry import Polygon + + +def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: + minx, miny, maxx, maxy = model_area.bounds + # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range + subd = ceil((len(model_area.exterior.coords) / 32) ** 0.5) + dx = (maxx - minx) / subd + dy = (maxy - miny) / subd + elements = [] + x1 = minx + for i in range(subd): + j1 = miny + for j in range(subd): + elements.append(Polygon([[x1, j1], [x1, j1 + dy], [x1 + dx, j1 + dy], [x1 + dx, j1]])) + j1 += dy + x1 += dx + + gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid) + + return gdf.clip(model_area) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ecc10c222..af14827cb 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,50 +1,55 @@ +import geopandas as gpd import gc import importlib.util as iutil import string -from typing import List +from math import floor +from pathlib import Path +from typing import List, Tuple import numpy as np import pandas as pd -from shapely import Point +from pandas import json_normalize from shapely.geometry import Polygon from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters -from aequilibrae.project.network.haversine import haversine -from aequilibrae.project.network.link_types import LinkTypes +from aequilibrae.project.project_creation import remove_triggers, add_triggers from aequilibrae.utils import WorkerThread -from aequilibrae.utils.db_utils import commit_and_close +from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite +from .model_area_gridding import geometry_grid pyqt = iutil.find_spec("PyQt5") is not None if pyqt: from PyQt5.QtCore import pyqtSignal -if iutil.find_spec("qgis") is not None: - pass - class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, osm_items: List, project, model_area: Polygon) -> None: + def __init__(self, data, project, model_area: Polygon, clean: bool) -> None: WorkerThread.__init__(self, None) + + project.logger.info("Preparing OSM builder") + self.__emit_all(["text", "Preparing OSM builder"]) + self.project = project or get_active_project() self.logger = self.project.logger - self.osm_items = osm_items - self.model_area = model_area + self.model_area = geometry_grid(model_area, 4326) self.path = self.project.path_to_file self.node_start = 10000 - self.__link_types = None # type: LinkTypes + self.clean = clean self.report = [] - self.__model_link_types = [] - self.__model_link_type_ids = [] - self.__link_type_quick_reference = {} - self.nodes = {} - self.node_df = [] - self.links = {} - self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))""" + self.__all_ltp = pd.DataFrame([]) + self.__link_id = 1 + self.__valid_links = [] + + # Building shapely geometries makes the code surprisingly slower. + self.node_df = data["nodes"] + self.node_df.loc[:, "node_id"] = np.arange(self.node_start, self.node_start + self.node_df.shape[0]) + gc.collect() + self.links_df = data["links"] def __emit_all(self, *args): if pyqt: @@ -52,212 +57,183 @@ def __emit_all(self, *args): def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: - self.__worksetup() - node_count = self.data_structures() - self.importing_links(node_count, conn) + self.__update_table_structure(conn) + self.importing_network(conn) + + self.logger.info("Cleaning things up") conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) + conn.commit() + self.__do_clean(conn) + self.__emit_all(["finished_threaded_procedure", 0]) - def data_structures(self): - self.logger.info("Separating nodes and links") - self.__emit_all(["text", "Separating nodes and links"]) - self.__emit_all(["maxValue", len(self.osm_items)]) - - alinks = [] - n = [] - tot_items = len(self.osm_items) - # When downloading data for entire countries, memory consumption can be quite intensive - # So we get rid of everything we don't need - for i in range(tot_items, 0, -1): - item = self.osm_items.pop(-1) - if item["type"] == "way": - alinks.append(item) - elif item["type"] == "node": - n.append(item) - self.__emit_all(["Value", tot_items - i]) - gc.collect() + def importing_network(self, conn): + self.logger.info("Importing the network") + node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() - self.logger.info("Setting data structures for nodes") - self.__emit_all(["text", "Setting data structures for nodes"]) - self.__emit_all(["maxValue", len(n)]) - - self.node_df = [] - for i, node in enumerate(n): - nid = node.pop("id") - _ = node.pop("type") - node["node_id"] = i + self.node_start - node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"])) - self.nodes[nid] = node - self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]]) - self.__emit_all(["Value", i]) - del n - self.node_df = ( - pd.DataFrame(self.node_df, columns=["A", "B", "C", "D"]) - .drop_duplicates(subset=["C", "D"]) - .to_records(index=False) - ) - - self.logger.info("Setting data structures for links") - self.__emit_all(["text", "Setting data structures for links"]) - self.__emit_all(["maxValue", len(alinks)]) - - all_nodes = [] - for i, link in enumerate(alinks): - osm_id = link.pop("id") - _ = link.pop("type") - all_nodes.extend(link["nodes"]) - self.links[osm_id] = link - self.__emit_all(["Value", i]) - del alinks - - self.logger.info("Finalizing data structures") - self.__emit_all(["text", "Finalizing data structures"]) - - node_count = self.unique_count(np.array(all_nodes)) - - return node_count - - def importing_links(self, node_count, conn): - node_ids = {} - - vars = {} - vars["link_id"] = 1 - table = "links" - fields = self.get_link_fields() - self.__update_table_structure(conn) - field_names = ",".join(fields) - - self.logger.info("Adding network nodes") - self.__emit_all(["text", "Adding network nodes"]) - sql = "insert into nodes(node_id, is_centroid, osm_id, geometry) Values(?, 0, ?, MakePoint(?,?, 4326))" - conn.executemany(sql, self.node_df) - conn.commit() - del self.node_df + self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) + self.node_df.set_index(["osm_id"], inplace=True) + + self.__process_link_chunk() + shape_ = self.links_df.shape[0] + message_step = max(1, floor(shape_ / 100)) + self.__emit_all(["maxValue", shape_]) - self.logger.info("Adding network links") + self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) - L = len(list(self.links.keys())) - self.__emit_all(["maxValue", L]) - - counter = 0 - mode_codes, not_found_tags = self.modes_per_link_type(conn) - owf, twf = self.field_osm_source() - all_attrs = [] - all_osm_ids = list(self.links.keys()) - for osm_id in all_osm_ids: - link = self.links.pop(osm_id) + geometries = [] + self.links_df.set_index(["osm_id"], inplace=True) + for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) - counter += 1 - if counter % 1000 == 0: - self.logger.info(f"Creating segments from {counter:,} out of {L:,} OSM link objects") - vars["osm_id"] = osm_id - vars["link_type"] = "default" - linknodes = link["nodes"] - linktags = link["tags"] - - indices = np.searchsorted(node_count[:, 0], linknodes) - nodedegree = node_count[indices, 1] - - # Makes sure that beginning and end are end nodes for a link - nodedegree[0] = 2 - nodedegree[-1] = 2 - - intersections = np.where(nodedegree > 1)[0] - segments = intersections.shape[0] - 1 - - # Attributes that are common to all individual links/segments - vars["direction"] = (linktags.get("oneway") == "yes") * 1 - - for k, v in owf.items(): - vars[k] = linktags.get(v) - - for k, v in twf.items(): - val = linktags.get(v["osm_source"]) - if vars["direction"] == 0: - for d1, d2 in [("ab", "forward"), ("ba", "backward")]: - vars[f"{k}_{d1}"] = self.__get_link_property(d2, val, linktags, v) - elif vars["direction"] == -1: - vars[f"{k}_ba"] = linktags.get(f"{v['osm_source']}:{'backward'}", val) - elif vars["direction"] == 1: - vars[f"{k}_ab"] = linktags.get(f"{v['osm_source']}:{'forward'}", val) - - vars["modes"] = mode_codes.get(linktags.get("highway"), not_found_tags) - - vars["link_type"] = self.__link_type_quick_reference.get( - vars["link_type"].lower(), self.__repair_link_type(vars["link_type"]) - ) + if counter % message_step == 0: + self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects") + + # How can I link have less than two points? + if not isinstance(link["nodes"], list): + self.logger.debug(f"OSM link/feature {idx} does not have a list of nodes.") + continue + + if len(link["nodes"]) < 2: + self.logger.debug(f"Link {idx} has less than two nodes. {link.nodes}") + continue + + # The link is a straight line between two points + # Or all midpoints are only part of a single link + node_indices = node_count.loc[link["nodes"], "counter"].to_numpy() + if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: + # The link has no intersections + geometries.append([idx, self._build_geometry(link.nodes)]) + else: + # Make sure we get the first and last nodes, as they are certainly the extremities of the sublinks + node_indices[0] = 2 + node_indices[-1] = 2 + # The link has intersections + # We build repeated records for links when they have intersections + # This is because it is faster to do this way and then have all the data repeated + # when doing the join with the link fields below + intersecs = np.where(node_indices > 1)[0] + for i, j in zip(intersecs[:-1], intersecs[1:]): + geometries.append([idx, self._build_geometry(link.nodes[i : j + 1])]) + + # Builds the link Geo dataframe + self.links_df.drop(columns=["nodes"], inplace=True) + # We build a dataframe with the geometries created above + # and join with the database + geo_df = pd.DataFrame(geometries, columns=["link_id", "geometry"]).set_index("link_id") + self.links_df = self.links_df.join(geo_df, how="inner") + + self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 + + self.node_df = self.node_df.reset_index() + + # Saves the data to disk in case of issues loading it to the database + osm_data_path = Path(self.project.project_base_path) / "osm_data" + osm_data_path.mkdir(exist_ok=True) + self.links_df.to_parquet(osm_data_path / "links.parquet") + self.node_df.to_parquet(osm_data_path / "nodes.parquet") + + self.logger.info("Adding nodes to file") + self.__emit_all(["text", "Adding nodes to file"]) + + # Removing the triggers before adding all nodes makes things a LOT faster + remove_triggers(conn, self.logger, "network") + + cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types", "lon", "lat"] + insert_qry = f"INSERT INTO nodes ({','.join(cols[:-2])}, geometry) VALUES(?,?,?,?,?, MakePoint(?,?, 4326))" + conn.executemany(insert_qry, self.node_df[cols].to_records(index=False)) - if len(vars["modes"]) > 0: - for i in range(segments): - attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields) - if attributes is None: - continue - all_attrs.append(attributes) - vars["link_id"] += 1 - - self.__emit_all(["text", f"{counter:,} of {L:,} super links added"]) - self.links[osm_id] = [] - sql = self.insert_qry.format(table, field_names, ",".join(["?"] * (len(all_attrs[0]) - 1))) - self.logger.info("Adding network links") - self.__emit_all(["text", "Adding network links"]) - try: - conn.executemany(sql, all_attrs) - except Exception as e: - self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args)) - self.logger.error(sql) - raise e - - def __worksetup(self): - self.__link_types = self.project.network.link_types - lts = self.__link_types.all_types() - for lt_id, lt in lts.items(): - self.__model_link_types.append(lt.link_type) - self.__model_link_type_ids.append(lt_id) + del self.node_df + gc.collect() - def __update_table_structure(self, conn): - structure = conn.execute("pragma table_info(Links)").fetchall() - has_fields = [x[1].lower() for x in structure] - fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] - for field in [f for f in fields if f not in has_fields]: - ltype = self.get_link_field_type(field).upper() - conn.execute(f"Alter table Links add column {field} {ltype}") + # But we need to add them back to add the links + add_triggers(conn, self.logger, "network") + + # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") + + # I could not get the above line to work, so I used the following code instead + self.links_df.index.name = "osm_id" + self.links_df.reset_index(inplace=True) + insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" + cols_no_geo = self.links_df.columns.tolist() + cols_no_geo.remove("geometry") + insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo))) + + cols = cols_no_geo + ["geometry"] + links_df = self.links_df[cols].to_records(index=False) + + del self.links_df + gc.collect() + self.logger.info("Adding links to file") + self.__emit_all(["text", "Adding links to file"]) + conn.executemany(insert_qry, links_df) + + def _build_geometry(self, nodes: List[int]) -> str: + slice = self.node_df.loc[nodes, :] + txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) + return f"LINESTRING({txt})" + + def __do_clean(self, conn): + if not self.clean: + conn.execute("VACUUM;") + return + self.logger.info("Cleaning up the network down to the selected area") + links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) + existing_link_ids = gpd.sjoin(links, self.model_area, how="left").dropna().link_id.to_numpy() + to_delete = [[x] for x in links[~links.link_id.isin(existing_link_ids)].link_id] + conn.executemany("DELETE FROM links WHERE link_id = ?", to_delete) conn.commit() + conn.execute("VACUUM;") + + def __process_link_chunk(self): + self.logger.info("Processing link modes, types and fields") + self.__emit_all(["text", "Processing link modes, types and fields"]) + + # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk + # And let's also assume that each row will be 200 fields at 8 bytes each + # This makes 2Gb roughly equal to 2.6 million rows, so 2 million would so. + chunk_size = 1_000_000 + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + self.links_df = pd.DataFrame([]) + # Initialize link types + with read_and_close(self.project.path_to_file) as conn: + self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) + self.__emit_all(["maxValue", len(list_dfs)]) + for i, df in enumerate(list_dfs): + self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}") + self.__emit_all(["Value", i]) + if "tags" in df.columns: + # It is critical to reset the index for the concat below to work + df.reset_index(drop=True, inplace=True) + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] + df = self.__build_link_types(df) + df = self.__establish_modes_for_all_links(conn, df) + df = self.__process_link_attributes(df) + else: + self.logger.error("OSM link data does not have tags. Skipping an entire data chunk") + df = pd.DataFrame([]) + list_dfs[i] = df + self.links_df = pd.concat(list_dfs, ignore_index=True) + + def __build_link_types(self, df): + data = [] + df = df.fillna(value={"highway": "missing"}) + df.highway = df.highway.str.lower() + for i, lt in enumerate(df.highway.unique()): + if str(lt) in self.__all_ltp.highway.values: + continue + data.append([*self.__define_link_type(str(lt)), str(lt)]) + self.__all_ltp = pd.concat( + [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] + ) + self.__all_ltp.drop_duplicates(inplace=True) + df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") + return df.drop(columns=["highway"]) - def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields): - ii = intersections[i] - jj = intersections[i + 1] - all_nodes = [linknodes[x] for x in range(ii, jj + 1)] - - vars["a_node"] = node_ids.get(linknodes[ii], self.node_start) - if vars["a_node"] == self.node_start: - node_ids[linknodes[ii]] = vars["a_node"] - self.node_start += 1 - - vars["b_node"] = node_ids.get(linknodes[jj], self.node_start) - if vars["b_node"] == self.node_start: - node_ids[linknodes[jj]] = vars["b_node"] - self.node_start += 1 - - vars["distance"] = sum( - [ - haversine(self.nodes[x]["lon"], self.nodes[x]["lat"], self.nodes[y]["lon"], self.nodes[y]["lat"]) - for x, y in zip(all_nodes[1:], all_nodes[:-1]) - ] - ) - - geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes] - inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes]) - if inside_area == 0: - return None - geometry = "LINESTRING ({})".format(", ".join(geometry)) - - attributes = [vars.get(x) for x in fields] - attributes.append(geometry) - return attributes - - def __repair_link_type(self, link_type: str) -> str: + def __define_link_type(self, link_type: str) -> Tuple[str, str]: + proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() @@ -266,53 +242,110 @@ def __repair_link_type(self, link_type: str) -> str: if piece in ["link", "segment", "stretch"]: link_type = "_".join(split[0 : i + 1]) + if self.__all_ltp.shape[0] >= 51: + link_type = "aggregate_link_type" + if len(link_type) == 0: link_type = "empty" - if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types: - link_type = "aggregate_link_type" - - if link_type in self.__model_link_types: - lt = self.__link_types.get_by_name(link_type) + if link_type in self.__all_ltp.link_type.values: + lt = proj_link_types.get_by_name(link_type) if original_link_type not in lt.description: lt.description += f", {original_link_type}" lt.save() - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [lt.link_type_id, link_type] letter = link_type[0] - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: letter = letter.upper() - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: for letter in string.ascii_letters: - if letter not in self.__model_link_type_ids: + if letter not in self.__all_ltp.link_type_id.values: break - lt = self.__link_types.new(letter) + lt = proj_link_types.new(letter) lt.link_type = link_type lt.description = f"Link types from Open Street Maps: {original_link_type}" lt.save() - self.__model_link_types.append(link_type) - self.__model_link_type_ids.append(letter) - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [letter, link_type] - def __get_link_property(self, d2, val, linktags, v): - vald = linktags.get(f'{v["osm_source"]}:{d2}', val) - if vald is None: - return vald + def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFrame: + p = Parameters() + modes = p.parameters["network"]["osm"]["modes"] - if vald.isdigit(): - if vald == val and v["osm_behaviour"] == "divide": - vald = float(val) / 2 - return vald + mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() + mode_codes = {p[0]: p[1] for p in mode_codes} - @staticmethod - def unique_count(a): - # From: https://stackoverflow.com/a/21124789/1480643 - unique, inverse = np.unique(a, return_inverse=True) - count = np.zeros(len(unique), int) - np.add.at(count, inverse, 1) - return np.vstack((unique, count)).T + type_list = {} + notfound = "" + for mode, val in modes.items(): + all_types = val["link_types"] + md = mode_codes[mode] + for tp in all_types: + type_list[tp] = "".join(sorted("{}{}".format(type_list.get(tp, ""), md))) + if val["unknown_tags"]: + notfound += md + + type_list = {k: "".join(set(v)) for k, v in type_list.items()} + + df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) + df = df.merge(df_aux, on="link_type", how="left").fillna(value={"modes": "".join(sorted(notfound))}) + return df + + def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: + df = df.assign(direction=0, link_id=0) + if "oneway" in df.columns: + df.loc[df.oneway == "yes", "direction"] = 1 + df.loc[df.oneway == "backward", "direction"] = -1 + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + + for x in fields["one-way"]: + if "link_type" in x.keys(): + continue + keys_ = list(x.values())[0] + field = list(x.keys())[0] + osm_name = keys_.get("osm_source", field).replace(":", "_") + df.rename(columns={osm_name: field}, inplace=True, errors="ignore") + + for x in fields["two-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + if "osm_source" not in keys_: + continue + osm_name = keys_.get("osm_source", field).replace(":", "_") + if osm_name not in df.columns: + continue + df[f"{field}_ba"] = df[osm_name].copy() + df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") + if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": + df[f"{field}_ab"] = pd.to_numeric(df[f"{field}_ab"], errors="coerce") + df[f"{field}_ba"] = pd.to_numeric(df[f"{field}_ba"], errors="coerce") + + # Divides the values by 2 or zero them depending on the link direction + df.loc[df.direction == 0, f"{field}_ab"] /= 2 + df.loc[df.direction == -1, f"{field}_ab"] = 0 + + df.loc[df.direction == 0, f"{field}_ba"] /= 2 + df.loc[df.direction == 1, f"{field}_ba"] = 0 + + if f"{field}_forward" in df: + fld = pd.to_numeric(df[f"{field}_forward"], errors="coerce") + df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] + if f"{field}_backward" in df: + fld = pd.to_numeric(df[f"{field}_backward"], errors="coerce") + df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] + cols = list_columns(self.project.conn, "links") + ["nodes"] + return df[[x for x in cols if x in df.columns]] + + ######## TABLE STRUCTURE UPDATING ######## + def __update_table_structure(self, conn): + structure = conn.execute("pragma table_info(Links)").fetchall() + has_fields = [x[1].lower() for x in structure] + fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] + for field in [f for f in fields if f not in has_fields]: + ltype = self.get_link_field_type(field).upper() + conn.execute(f"Alter table Links add column {field} {ltype}") + conn.commit() @staticmethod def get_link_fields(): @@ -339,51 +372,3 @@ def get_link_field_type(field_name): for tp in fields["one-way"]: if field_name in tp: return tp[field_name]["type"] - - @staticmethod - def field_osm_source(): - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - - owf = { - list(x.keys())[0]: x[list(x.keys())[0]]["osm_source"] - for x in fields["one-way"] - if "osm_source" in x[list(x.keys())[0]] - } - - twf = {} - for x in fields["two-way"]: - if "osm_source" in x[list(x.keys())[0]]: - twf[list(x.keys())[0]] = { - "osm_source": x[list(x.keys())[0]]["osm_source"], - "osm_behaviour": x[list(x.keys())[0]]["osm_behaviour"], - } - return owf, twf - - def modes_per_link_type(self, conn): - p = Parameters() - modes = p.parameters["network"]["osm"]["modes"] - - mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() - mode_codes = {p[0]: p[1] for p in mode_codes} - - type_list = {} - notfound = "" - for mode, val in modes.items(): - all_types = val["link_types"] - md = mode_codes[mode] - for tp in all_types: - type_list[tp] = "{}{}".format(type_list.get(tp, ""), md) - if val["unknown_tags"]: - notfound += md - - type_list = {k: "".join(set(v)) for k, v in type_list.items()} - - return type_list, "{}".format(notfound) - - @staticmethod - def get_node_fields(): - p = Parameters() - fields = p.parameters["network"]["nodes"]["fields"] - fields = [list(x.keys())[0] for x in fields] - return fields + ["osm_id"] diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 76c810b1e..58b3fd6c9 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -10,20 +10,21 @@ For the original work, please see https://github.com/gboeing/osmnx """ +import gc +import importlib.util as iutil import logging -import time import re -from typing import List +import time +from typing import List, Dict +import pandas as pd import requests from shapely import Polygon -from .osm_params import http_headers, memory -from aequilibrae.parameters import Parameters from aequilibrae.context import get_logger +from aequilibrae.parameters import Parameters from aequilibrae.utils import WorkerThread -import gc -import importlib.util as iutil +from .osm_params import http_headers, memory spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -50,6 +51,9 @@ def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None self.overpass_endpoint = par["overpass_endpoint"] self.timeout = par["timeout"] self.sleeptime = par["sleeptime"] + self._nodes = [] + self._links = [] + self.data: Dict[str, pd.DataFrame] = {"nodes": pd.DataFrame([]), "links": pd.DataFrame([])} def doWork(self): infrastructure = 'way["highway"]' @@ -64,7 +68,7 @@ def doWork(self): m = f"[maxsize: {memory}]" for counter, poly in enumerate(self.polygons): msg = f"Downloading polygon {counter + 1} of {len(self.polygons)}" - self.logger.debug(msg) + self.logger.info(msg) self.__emit_all(["Value", counter]) self.__emit_all(["text", msg]) west, south, east, north = poly.bounds @@ -80,10 +84,26 @@ def doWork(self): ) json = self.overpass_request(data={"data": query_str}, timeout=self.timeout) if json["elements"]: - self.json.extend(json["elements"]) - del json - gc.collect() + for tag, lst in [("node", self._nodes), ("way", self._links)]: + df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag]) + if tag == "node": + df = df.assign(is_centroid=0, modes="", link_types="", node_id=0) + lst.append(df) + del json + gc.collect() + self.__emit_all(["Value", len(self.polygons)]) + self.__emit_all(["text", "Downloading finished. Processing data"]) + for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: + df = pd.DataFrame([]) + if len(lst) > 0: + df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) + if table != "links": + df = df.drop(columns=["tags"], errors="ignore") + self.data[table] = df.rename(columns={"id": "osm_id"}, errors="ignore") + lst.clear() + gc.collect() + self.__emit_all(["FinishedDownloading", 0]) def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_duration=None): diff --git a/aequilibrae/transit/map_matching_graph.py b/aequilibrae/transit/map_matching_graph.py index a1ab70418..56e5610d8 100644 --- a/aequilibrae/transit/map_matching_graph.py +++ b/aequilibrae/transit/map_matching_graph.py @@ -118,8 +118,7 @@ def __build_graph_from_cache(self): def __build_graph_from_scratch(self): self.logger.info(f"Creating map-matching graph from scratch for mode_id={self.mode_id}") - self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=np.nan) - self.df.loc[:, "geo"] = self.df.wkt.apply(shapely.wkt.loads) + self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=self.df.wkt.apply(shapely.wkt.loads)) self.df.loc[self.df.link_id < 0, "link_id"] = self.df.link_id * -1 + self.df.link_id.max() + 1 # We make sure all link IDs are in proper order diff --git a/aequilibrae/utils/db_utils.py b/aequilibrae/utils/db_utils.py index 23534138f..83efb49a2 100644 --- a/aequilibrae/utils/db_utils.py +++ b/aequilibrae/utils/db_utils.py @@ -79,6 +79,10 @@ def get_schema(conn, table_name): return {e.name: e for e in rv} +def list_columns(conn, table_name): + return list(get_schema(conn, table_name).keys()) + + def has_column(conn, table_name, col_name): return col_name in get_schema(conn, table_name) diff --git a/requirements.txt b/requirements.txt index d22ed0b9d..04296117f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +geopandas \ No newline at end of file diff --git a/tests/aequilibrae/project/data/porto_rico.parquet b/tests/aequilibrae/project/data/porto_rico.parquet new file mode 100644 index 0000000000000000000000000000000000000000..97dfe198c23b20bad43fe123f8dadf8c091a0ee2 GIT binary patch literal 11961 zcmchd2V7Iv`~Lw^QL#=CE!7k&YDGwH2t(R>!$=^Ia0!q=>dGPsWSId3SE;k^y7%6z zTCEGmb?=3CSZ5v8YTbMPJ|}>-e*L}v|JVQj|NVKHQ=bd)`GNClZYp3c1K1gOdpe3apuoe_%W4@*flN zC#50YZwUSey{Gt<2BATIA6A@Kt@v2q;3>Z059@~?t{++@@q=GnO1I~V3DViBI+1eC5Awza6Fah%J#IFuL z$gr=9JXgzNDSG;tqNF_s7`D!nTCGE(Df;6_jYr)EKltJ5#G5?Sx4k@LEc$Pv`aNve ziK4A7?UTlVuigq+TpK*3HLuJH_Fp!j@qrE$J^6LJr5WIlcs(903#aG{&sU#c0&b!0 z^~t`EDLSy$q;7k_Yr}(|4sS)#A;;#-9RU67jTjy`GlZh&UoRiC1KeZqgxNVQC_1W0 zQ~m>ZZ{dSC%bHPi?;PvANnlsm`BAl-Q1p(NO9!m5-wvOhLz~v4=#CTb4(fsWWxaCb z?E@%!Ky&w>tufxh$d>z3>rnL2maQ`V!EuFy|LRklqAdxQQ32q&10(-9UW=k_k+0Xc z2RGA1F8$q)qF<%Ur)t302Pd=;flul7o{?a@UI%g``oH1P3zqPoj0Hz@2s{}5nn!Qr z`TOhxw>~2BB`#ie-gVu&=n9oyi(fa%2-heaq^2U7$dF6vr2To|v< ze8rS)sNb}-{7)zJ>pmiQjs=|5d*Aez;9b)W&RdOmX}xsYgAw4o{4>qR>}S|(_LYxU zqkq}*RY?Ny+G(rWZ-)FY1`c^$Zy&>Eb{nnafDh^CJNxft*!$~Vvbo?!C%0|dzK3D8 zCncYbMgN97&)&Yen_({+_ch%9ghvOi+BE;oE{3Jzf8u96mZtRcss*#J|8vZH0m!7O7sr} zKNLU{@UJgM6`$S4uqmC7tZa<=>~IVUyaGOJ^s$x0A3G!Hp$EV(`drNW75cT3M($<7 zd!-GdXM^>luXca5m0=yN=G7m7`B+9fr`y5xJKuc02KGMnz#7p3{AoAe!DaAAezOLB zQ?@YdklwMwegPjT9{g2a1^ipqe3keXk9KARKWPK5^_S+P1bj40yzl4D4BP&<0W+tg zy=vo%odw{to4A!DFy88%7I!J|`8IcQJ3*hjXZvitvI*lqIk+Vk%sb-xVLW*A*m#bo8%3!&kWg* zKi}igwI<%X!v~LTTCsiu+K1f>9OnyeMroVLV6R6{gNQ7{J427!y3%4_$gxz!y3EFlZW_X zeeYRPbZa%kE;X)rH52^sT922Dz~xQXe?AfGHKb0&rs3f2Q>R4kM*m+=j~!3|cI+H< zV#P}yJ?X^$YeC?;Z6DX018%qS>5!d2Fl_Fe;DJM5As+t-pMP)_!-kwF&Nv4?eSOC4 zODhqt2l-K?-gr>fbqV-7C8&2E{TIZZ6NSmF$#QCp^}P4lbY|F8>^Jw@ zRN4?fFUCJTTMt}%X9@39#Onj!@}mo;p#AaNV>MWRUE0iQw|_Fj8amuA9gp_GbAPKp z4ZOYb+1&`p>+Sa0-5uO3?0lCum|y2^3N;@b^=$UbbFlaR%At`M?dG5L*HwkEWVx!4j;4lYF%)YB`%`jc*F;ZGmQ7D z@y8J}#xbno(ftG|^sPO#Me>608TLY}-#*^~E^I#T@QtwyD>>ERt0P!%>%J`8)en4M zc&y-O4cI^75{~|Kib~ zkqve<&JVSN*$Lq7p|@K5fknN${A{|xqlbqruo^}oU!Du&{(}As_O#qJV=Ec?|{Lcwh-N5mOwzb`k?s2;Iz#`bkIpEPo+fatRCmDC`IheVURw)<^dtX}lzy^QySaR#TvOx^n zraU-G3vTtw7^esS{k(RC8a!tAp_7{i!hg-%%pMDWM4gd+-4@)hUQlQc z^JV?NVc3tg?YHaAIw z!2MACO@G9r>Qwtp(0AgmlIxLukUs-$zg)!l;x(`wLuKggfot=0Cs9_-Jb zC^lXN=RW0c$p;sIcVi`4f70^9N9UC?>`tR%eIxLqZ`Qr>1&2&9MQ`%K`A#?P`?Vzu zYp2tdtC1f+f3rvBF2?yoJkin!=bzd;E}yAzWBA)3tLR!DTd8@^@p%b{^g+UjXr-2lG`P}zwes?i!<;S<%Zup3zB@;XJNe6S&^uyyD zQ*`;7ZRH1?4EwzD%od}9Hq?}5gZG}IEK3?-}6h+rs?c3y%1^V7_k6^ht&vqTv=Yg4F z=j`43tRd=SuGYWf0Q4E-Vf{{;&eai6^bvdC zmIm&3eG*tc+h$4%iAf}JOyh^7sUd^ ztFmv>A8WxIgts;ngGV2@^tx>kRh2)$^IOk5@rjwDIg^frj>G!6kR5-kBij4E%wCp^ z_0((S?}AUkgBJ|3*2en#DhhB>TkIA|4EX6|6(m@R~e*pJM)2fhgi*B_@MMBv`FzUn#>JX5^q%_rcZj~X970ZyAYIV%G4-rVSYp%D31 z(`@P{7UP9qz3uFa{M&ROdvGo|{m;Dfxyav(dSXLI`*jCGpDV!58I~SG@P+U4qlSRn zKR*7I!bQ=kg1?{a25(yScSRVu2}z*XFSr?t>Jxo79G@7FgZ(8t04u7PqBocO-MIoT zvzPp0EkXOmz-ABN6 ze(C*^@lbT=%}X(@u>X1QFCqWcptWBg0`o~ihCY|B{?#pOIQ+9OqEA!s>@n0OKkT>N z$-xl%ZOID>TMXXt)8K0h(ZAsADf$7p>{9&KG{#%8`-@JdkqrB!_QRbsp>NY`Uy3Tg zM|y-^E(T8>)WGA9{eS9)8(nh1*PG107CajJ)1Ae8)4|FUZKio}p4gP#CGQQ`d;Y|E zW6XCG3LZdyhmCnz1HqpqKl?Vg4EeNY9c}XWcs_Eu*^dvvNB2q&uLb{={O!^Lu=4My ziuU8+@1~p6FQNbLHIdf&;Lz?1D{g}8cl2?0#QAB`^;JjEr^+lUr{V2L_-9F8daK7}~3A`_3UXQ!rUq%KxT7Y9e-Z?M`=h5d)49k`1 z|7uR8yaSj#_DKiVE}9_~gFlZ)L}7kgO1oOOgHQjo_1rkj=gX9jn#AI~yUEV^tO5Aj zD>IW?&Vqf0{q(dsc*D;t_}g%PuIPC6r>5v1;vwTy7;qscs|ft z{G|+hyWOZ0UBE%DhUxj>`?tCe+%}(Ko0n`kV1vHOAGK{8FTnjyI&Sd6lBuVT*8y`Y z+Ba$dzN46QJp#Pi_0<w@#N6yB^wHW)~ zh8`ye_J=>ux3uIg#d*7)tRDyR?svcgmSs54t$7sq2im)dqn0ylwC)QD>Hk|tMZ=w7 z?|Ov%ma&54eOEH7W)zAlS~e`7-@)#?8O@?pU8HN^jcc>MJc z?0CAMSv}aN(MPv^AFjs!d2Q$6Taf3y?^}cQakXFQ5$IF4vuUFaYmtA`W_}Y6eG?BA zn@9YF{dx1LyQiVg`p&rH1<#t=EAChDsminU)wn;sv*KLwT=XBxeoqhFfc?0~!=MG= z5!#-0)@(xiM?+>eK>Id@XSnq@GwkpC-Ps2*{=>ziZn(BEY!e(4E};GT-4zeTTN$=- z`(Jaqfce>~0qJ1JkV~Uu!QOlbUR<~KLL+#{zW%}K+i|~P`{>ug0oaepbB-N|*DotC zuKyO#&*HlWH`{~rBROXc#Ph&D+5n%uxZhue2Ze*Mf6hpZnGLRJ)%M5+)CXQouBg8s z_cx!n`1}p}d-rkhq&0J=zbZ%njR?d8xIdg#x5NPU-nSfN*n#|~FZN)5dqVoJ2vakDeUj(MGjhe4Cj5D`gn7&k8jb>tBy15 zp=JRIdvG3j|9S2EkDlLqz`RKR?sj)0c8eH=0bu~Z!z_>s0(e5p6XH)<{86}jpOlSBf?BpVpM(40<+rX(p$X-q2l8r zDM2hnQ9LfMUq50(n*+AOYsH>UkJF|1YVCAusuh1bxY~@dss{M_of>+ZcUl~bU~;*2 zWJYR@#-T4Zp}_0S47Ce|I+H`MtuC>WxuVRCSDG-HPH%AN^ezQaenx1Pl+JXUvW+DN<4pW)U>b)pZTb*hsmZUbD3TzH5(F-%Tk!g@CtCjdx zOSDYZo7K=upTH5J-lRiItQ3cVbV)^x$&|oJ)2WTg zT)N)sa%+n`37ia@+vKcjB2}BLkS;|=sG7J`?R2WOMz>S%azU2Vq&1ohWbRTE^m5vW zyQC(kll-&WF+2gA+d-~uuqhc=riV+svzFPunE zs_T54gSgonNl9vl4YMKwyqv5q@+y#NB%aGGGGpe%$e9kk!3G@}z18aUn2QlSD3u$r zn%;Ue+vY|}cqg%nYb55Nr{3w*L%AG%De+FOOKnWxDD(~sriBX=tnL-7RAUxagNqEP zB+>SdDihX*)oCI%9-9TN5Fgpq4XQP9M!pUAs*Z2Vv5qtvTPaCPu&B$}^KBOB!i$cK zry{~gaQ8I7UBz8^F|m;`0%Sd6oqsfsi;0XzFge~#0p}){|N$}~xAx>R|Iw#=^0;FT0>QzgY}lb~2@(P=aIvO=|3;mXgG;X1!0qmYy( zdNh>1x0Z@`XIQK2vjtuzW9aZYN|lYd6%n0up8UK-n_4ML&NWMk-tK%Q&rEd6DH5e* zCVR_@^7G_84f^Ok60SZk(OWOad{trv7k#r;N|jk_A#+lQwHA>J@||fGfl;H(CHiM- z_!6!AG7V)esvRc}Ba|lNKW@l=3r^Q_5tX5)tM#OS%Oj-)!!Xhfl&lbAUQbf*ttV681 z*t8UhM-BUFDT5)aYQ4d~q@0KLX0C>k7&ASInW{XkAtK9V$ZEgb{cZ?6lTn)5bbiXRvm~3vTphL z%IMfMk(s9^Yt@vPrNjEriQ^2p7IUG-6KzlspNkbmX(AaHeau>q(56tx64PsJq?BNd z5nI4sL~f4Lkd-H~>n-u_>T3t)T$b;h3+yX4!yXc>1=0sG4;$nw7cV*tbj}%=9i1MDyP>bUVk8O@FmtN8AhXy64})lHyi6gEsjH5vlHXEv5w0iSE50D zK?h;7uk7kt1U{67fEpwEjGA$)4RHrJ+ zu)et3GN&O&X@)(`R$=tt&Py|zWVdmh$G}N#J50`sv@yZ zbj!wiLS9LfXen=uBc90IkTbBJw3c`$$!Tbp1KW}uoRY6}A(kOmBsY8Q#Fv>>uoGfG z+8ZY#1?*u!J;~|3VvRM6Tq7^(ptpC-JlF_(hQO*5$GagrPn9R*zTZZjC&M0y_3n*v zodEkE`fJ^3B1O*Iwx$}r|!60+Qd6A9yN~ zp9@pC8HGh2SLkx4(qWFL2=9tXnd^dJNj6-su9J1rx zD$2~&Ddl9(bIFp0ZdsWu9{ueqp+2 z-i3AK(JSJ;dpp@nl^#hJ;#8nQUP?3b;;~1PbC^(=l!aqdvW=Xaipf{1_mIUUm#DyB z4i0f~_)ST{p$vsJ=Q9H2j|d^>t?Dto-IKXd#ZggG23<+>tTb<_gfxG96of=lDvB>a z++klOepgU9GZaV_@dc!1X-$7&iuaJGso`pQ8Klwwjy#M(S|)i9hXyhQ?|G4w33JI; zQVBV#;vz9oPFRS8W6k`@6*-T>5=A1J+&hybEoo1^)hE|mDkhEop-&>73s~x@FdRJ$y-QHR4aM(&X)lZ!`vE-RkOEVsLzW2{39RHb?!GBu+ EFK+URrvLx| literal 0 HcmV?d00001 diff --git a/tests/aequilibrae/project/data/wynnum.parquet b/tests/aequilibrae/project/data/wynnum.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9c9eeb46c11259cc0352d909f48fb7536c569a0c GIT binary patch literal 1626 zcmbVN%WvaE7`NTDDo`P`s-r}zP_VQSNNH+2w)5CsX(xRo&c>PsX+5FAa=x9DIDE9o_QZA&6vCAkpEf$g~n1z(CJDI)sD7QGP` z1H@W5L>e>|q=Hs(7UiSsHv!=oqzKRy0V(l9;N?4#{+_&s>~0=|mRuoM|7gHw73TYI zZoK{NE86|vzRLg8I=gfEsqofk-+=t%m(f3e@a|lG@%87A1B`Zm=bcYJ`UT`k`SRs8 zkpEuQe@%h^)A3KY0}xNP@m}DjkM+K84|Qkk`R#m21wW>uKdnT6Ctv%2BUJdVNQBlV z+KE2w(gA|rxJ>~Ow~68wAo$WU9TB9$TjAKVQ5Jp13WM*i1gYqH@Pn02f(YLYz0mJ< z&CH_S(9Sud+2(X;O_mEc%W5M$eXGA<+~mIWOrrcb!+z2a`drg+S;jWOmx7#F6t3x0XTWclx@@NLz~Bb_Zz5M zJ#1dH_w(uw9%ubsgir-09uE)_P_~Z_5bC4dzOTz>|6_Nta(`*hyI{sL4D7#)mU60` z+}UycB#Ljxp}C%^x_ca!b+5p8T7`MHnV!l6+fZtPTO1&#J#4V;ns6o?uA$`%Bei+p z>5cS2<APN!A%^qM@8hozaCZ#b%@7>Cxx=QKY6tr{yE#odC@9;O9(FjAHJ(Hw9&W!_FG10*Q(eYaGvaK&BLD|rH2nzvnLPxF+$ zeeST{cS{3#auvfXa~bSNDAi?G8JzOV7}zUyjaLT!gfd5bX@1ZH%$YVcP1UNN^m67* zHDn3b-fN01@Gwww=|MNq8)^IzuJ5MDMw-=ISyS_5sp%Cez=PP~fisg!zusaol`=vu zCLUIwZ&(pNL>Lp8S*w#fm>gQAK{wZM+7&dz7U2JiQ*jPFB<10PuU_7CJ2j}yEU{4U zwBrHCR&UhFS8X^auT*a=a52=re|7xx=gM%O@EKPszA}$NmRAd34H)?RdCu(R3nCVg zGKZj41!XJPo~F1NW;yaiWC;VbXmBSKtm4;3Sccrwn% uBGU={I3uI(`0-SCwzbh{yE`7+Zsh6>h$Qg~5RSo%habI7c)rW 1000: diff --git a/tests/aequilibrae/project/test_polygon_gridding.py b/tests/aequilibrae/project/test_polygon_gridding.py new file mode 100644 index 000000000..21dd40562 --- /dev/null +++ b/tests/aequilibrae/project/test_polygon_gridding.py @@ -0,0 +1,22 @@ +from pathlib import Path + +import geopandas as gpd +import pytest + +from aequilibrae.project.network.osm.model_area_gridding import geometry_grid + + +def test_geometry_grid(): + pth = Path(__file__).parent / "data" + + # Small simple polygon + polygon = gpd.read_parquet(pth / "wynnum.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 1 + + # Bigger polygon + polygon = gpd.read_parquet(pth / "porto_rico.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 16 From 77edeae154a42dedb764ae4b2e1f32accdd3676c Mon Sep 17 00:00:00 2001 From: Pedro Camargo Date: Sun, 25 Feb 2024 10:24:05 +1000 Subject: [PATCH 31/33] Simply bumping up the version for the next release (#512) * Removes legacy code * updates versions --------- Co-authored-by: pveigadecamargo --- __version__.py | 3 ++- docs/source/_static/switcher.json | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/__version__.py b/__version__.py index f68a650ed..6519541e0 100644 --- a/__version__.py +++ b/__version__.py @@ -1,5 +1,6 @@ +# When updating the version, one must also update the docs/source/_static/switcher.json file version = 1.0 -minor_version = "0" +minor_version = "1" release_name = "Rio de Janeiro" release_version = f"{version}.{minor_version}" diff --git a/docs/source/_static/switcher.json b/docs/source/_static/switcher.json index 38c10b201..859aa1afc 100644 --- a/docs/source/_static/switcher.json +++ b/docs/source/_static/switcher.json @@ -4,6 +4,11 @@ "version": "develop", "url": "https://aequilibrae.com/python/develop/" }, + { + "name": "1.0.1", + "version": "1.0.1", + "url": "https://aequilibrae.com/python/V.1.0.1/" + }, { "name": "1.0.0", "version": "1.0.0", From 0bb151b0cffe02fb9eb583d0e3f33a068ad0354c Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 27 Feb 2024 11:29:15 +1000 Subject: [PATCH 32/33] Squashed commit of the following: commit 9337fb611463606b0ca89d1270210fa7fdf46714 Author: Jake-Moss Date: Tue Feb 27 11:22:48 2024 +1000 . commit 3f2c01b68805db1e8c9cf0d6821b8333bf6a5007 Author: Jake-Moss Date: Tue Feb 27 10:09:51 2024 +1000 I give up commit c77d5e675f3051cce295b7a950f23a9abddee69b Author: Jake-Moss Date: Tue Feb 27 10:03:52 2024 +1000 . commit 9dc365042b80d80bb90c89e0424a569816a9dc78 Author: Jake-Moss Date: Tue Feb 27 09:58:40 2024 +1000 . commit b4b945c6449fb8cf98b0b84a16f3c1111946fe9f Author: Jake-Moss Date: Tue Feb 27 09:53:10 2024 +1000 . commit e0c32f273a308bb2c3288a83c4b9287800200044 Author: Jake-Moss Date: Tue Feb 27 09:38:29 2024 +1000 . commit eedb859e5d1be8a76750106f63b95afa593641b4 Author: Jake-Moss Date: Tue Feb 27 09:30:20 2024 +1000 . commit d0def1127ae3b2b10a067637cb539d60656cf047 Author: Jake-Moss Date: Tue Feb 27 09:27:33 2024 +1000 . commit a6b2a8e1235c1bb21ee1aa21d38e7ea122686e35 Merge: 0548be6d 77edeae1 Author: Jake Moss Date: Tue Feb 27 09:19:30 2024 +1000 Merge branch 'develop' into pedro/ci_test commit 0548be6d06cb6796e6d4ed037a5391d15c02d4b4 Author: Jake-Moss Date: Tue Feb 27 09:13:07 2024 +1000 Macos test commit a10c7914f93c4ed6e506b88495557df44b5c9029 Author: Jake-Moss Date: Wed Feb 21 20:00:01 2024 +1000 Maybe fix MacOS again commit cad0579957150f63bebd85fbe29f0e1b4ffc981f Author: Jake-Moss Date: Wed Feb 21 19:43:00 2024 +1000 Maybe MacOS fix commit 24de8f78c3946ddf4a68c05e0be4412d80ca68ca Author: Jake-Moss Date: Wed Feb 21 19:31:08 2024 +1000 Hopefully fix CI commit 66ffcc3a724fd7d331a853159bfedd974f745b32 Author: pveigadecamargo Date: Wed Feb 21 17:01:57 2024 +1000 . commit f934c34c3b350538fb86742289cbaeaba6600c03 Author: pveigadecamargo Date: Wed Feb 21 16:41:36 2024 +1000 . commit 9f2ca2000b5ed15951f998146a9bb9962cca8bb8 Author: pveigadecamargo Date: Wed Feb 21 16:30:59 2024 +1000 . commit ba0d882bb93028edb18e5ad17939a4997a58ee9a Author: pveigadecamargo Date: Wed Feb 21 16:27:14 2024 +1000 . commit 315cbce6dde462ca14732978352841c928a859ca Author: Pedro Camargo Date: Wed Feb 21 01:01:23 2024 +1000 Update pyproject.toml commit d58e7cc9611d1c0f76c917b03f285c3bb6e37b4a Author: Pedro Camargo Date: Wed Feb 21 00:58:54 2024 +1000 Update pyproject.toml commit dd6723fd458b58b743149d5e3ca9b818d7ec60c2 Author: pveigadecamargo Date: Tue Feb 20 23:02:05 2024 +1000 . commit f7ae37e93e028f3d864329b1089832e14b8a9202 Author: pveigadecamargo Date: Tue Feb 20 22:58:19 2024 +1000 . commit 0f954e422a555f0718d4f28e91c838bf8decbd66 Author: pveigadecamargo Date: Tue Feb 20 22:54:21 2024 +1000 . commit 3dafd886d9b9d3ec5aded840ebace0a4d9ca4c61 Author: pveigadecamargo Date: Tue Feb 20 22:27:05 2024 +1000 . commit ebe3a192f4e2fb2bddd71aad4474f7977ba5bde0 Author: pveigadecamargo Date: Tue Feb 20 22:12:39 2024 +1000 . commit 9f4413e029a3d9036a0e2d2505ac12bafcdbb948 Merge: daf48a58 2e3c2340 Author: pveigadecamargo Date: Tue Feb 20 22:11:22 2024 +1000 Merge branch 'develop' of github.com:AequilibraE/aequilibrae into pedro/ci_test commit daf48a58cfd66f346a1ac725575968cb3d03fe14 Merge: 3df73da2 ccb9cfa2 Author: pveigadecamargo Date: Tue Feb 20 22:11:13 2024 +1000 . commit 3df73da2bd007278208767d74b8cefaadfefbb3c Author: pveigadecamargo Date: Tue Feb 20 22:10:13 2024 +1000 . commit 9448e76440d83988e3a1356d3cbeaf125b64bd92 Author: pveigadecamargo Date: Tue Feb 20 22:10:05 2024 +1000 adds emulation commit c9f2aaa7a9078061f4572704acf563010b70cc2f Author: pveigadecamargo Date: Tue Feb 20 21:20:12 2024 +1000 adds emulation commit b2d5d3dc52bfdb25582dd1ac8813dd92fac571f0 Author: pveigadecamargo Date: Tue Feb 20 21:16:51 2024 +1000 adds emulation commit 2458f9ae3524f427012444e413cfec0077d6503c Author: pveigadecamargo Date: Tue Feb 20 21:02:26 2024 +1000 adds emulation commit e9e660b00172a03a40a30d51d5023e0dd75a5624 Author: pveigadecamargo Date: Tue Feb 20 20:54:57 2024 +1000 adds emulation commit f112262230e7dc5c734439fedca5f62d568b561d Author: pveigadecamargo Date: Tue Feb 20 20:42:13 2024 +1000 Add ARM architectures for Linux and mac commit bae7d0d224bbcb1ef9db30da25338bf0b07357f2 Author: pveigadecamargo Date: Tue Feb 20 20:28:15 2024 +1000 tests cibuildwheels commit 293457a22a398988ca2ab0c454a55c13eaa2d3eb Author: pveigadecamargo Date: Tue Feb 20 20:09:43 2024 +1000 tests cibuildwheels commit 06b6a448f2e9f9d273c95f968d399df73d9a33be Author: pveigadecamargo Date: Tue Feb 20 19:31:21 2024 +1000 tests cibuildwheels commit 0ebce09a916c0ad5e9a471f1fc38f6fd47820d3f Author: pveigadecamargo Date: Tue Feb 20 19:23:39 2024 +1000 tests cibuildwheels --- .github/workflows/debug_tests.yml | 2 +- .github/workflows/documentation.yml | 12 ++++++------ .github/workflows/test_linux_with_coverage.yml | 2 +- .github/workflows/unit_tests.yml | 2 +- pyproject.toml | 16 +++++++++------- requirements.txt | 2 +- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 15cae28c1..8e6d5c94c 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -1,6 +1,6 @@ name: Run tests for debugging sigabrt -on: [pull_request] +#on: [pull_request] jobs: diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 065c9ffd4..79118e52a 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,11 +1,11 @@ name: Documentation -on: - push: - branches: - - develop - pull_request: - release: +#on: +# push: +# branches: +# - develop +# pull_request: +# release: jobs: build: diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index 38b5ae19f..a130a870a 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -1,6 +1,6 @@ name: Code coverage -on: [pull_request] +#on: [pull_request] jobs: testing: diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5b6af38d9..9a92fa651 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,6 +1,6 @@ name: Tests on all platforms with linting -on: [pull_request] +#on: [pull_request] jobs: linting: diff --git a/pyproject.toml b/pyproject.toml index 1c8551c0d..8fab0fcf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pandas", "pyarrow", "wheel"] +requires = ["setuptools", "numpy", "cython", "pandas", "pyarrow==15.0.0", "wheel"] [tool.ruff] @@ -84,18 +84,20 @@ test-extras = [] container-engine = "docker" -manylinux-x86_64-image = "manylinux2014" -manylinux-aarch64-image = "manylinux2014" -manylinux-pypy_x86_64-image = "manylinux2014" -manylinux-pypy_aarch64-image = "manylinux2014" +manylinux-x86_64-image = "manylinux_2_28" +manylinux-aarch64-image = "manylinux_2_28" +manylinux-pypy_x86_64-image = "manylinux_2_28" +manylinux-pypy_aarch64-image = "manylinux_2_28" [tool.cibuildwheel.linux] archs = ["auto", "aarch64"] build = ["cp310-*", "cp311-*", "cp312-*"] -repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}" +repair-wheel-command = [ + "auditwheel repair -w {dest_dir} {wheel} --exclude libarrow.so.1500 --exclude libarrow_python.so" +] [tool.cibuildwheel.macos] #archs = ["x86_64", "universal2", "arm64"] archs = ["x86_64"] environment = { CC="gcc-12", CXX="g++-12" } -repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" +repair-wheel-command = "delocate-wheel -vv --ignore-missing-dependencies --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" diff --git a/requirements.txt b/requirements.txt index f57c2ce8a..876332710 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,5 @@ pandas pyproj rtree openmatrix -pyarrow +pyarrow==15.0.0 geopandas From 28bfe7de1cf1db1620823830ed990058a9e2b679 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 27 Feb 2024 11:50:28 +1000 Subject: [PATCH 33/33] Re-enable CI --- .github/workflows/debug_tests.yml | 4 ++-- .github/workflows/documentation.yml | 12 ++++++------ .github/workflows/test_linux_with_coverage.yml | 4 ++-- .github/workflows/unit_tests.yml | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 8e6d5c94c..66b00eb85 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -1,6 +1,6 @@ name: Run tests for debugging sigabrt -#on: [pull_request] +on: [pull_request] jobs: @@ -26,4 +26,4 @@ jobs: pip install -e . - name: Runs test - run: python -m pytest \ No newline at end of file + run: python -m pytest diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 79118e52a..dc8e77a42 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,11 +1,11 @@ name: Documentation -#on: -# push: -# branches: -# - develop -# pull_request: -# release: +on: + push: + branches: + - develop + pull_request: + release: jobs: build: diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index a130a870a..93fe16396 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -1,6 +1,6 @@ name: Code coverage -#on: [pull_request] +on: [pull_request] jobs: testing: @@ -30,4 +30,4 @@ jobs: - name: Generate coverage report run: | - python3 -m pytest --cov=aequilibrae tests/ \ No newline at end of file + python3 -m pytest --cov=aequilibrae tests/ diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 9a92fa651..b8f65c694 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,6 +1,6 @@ name: Tests on all platforms with linting -#on: [pull_request] +on: [pull_request] jobs: linting: @@ -66,4 +66,4 @@ jobs: python setup.py build_ext --inplace - name: Runs test - run: python -m pytest \ No newline at end of file + run: python -m pytest