diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml deleted file mode 100644 index e0ac2b110..000000000 --- a/.github/workflows/build_linux.yml +++ /dev/null @@ -1,76 +0,0 @@ -name: Build and upload Linux Python Package (includes QGIS artifacts) - -on: [pull_request, release] - -jobs: - deploy: - runs-on: ubuntu-20.04 - env: - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - continue-on-error: true - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.9 - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install packages - run: | - python -m pip install --upgrade pip wheel cffi setuptools twine - - - name: Build manylinux Python wheels - uses: RalfG/python-wheels-manylinux-build@v0.7.1 - with: - python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312' - pip-wheel-args: '--no-deps' - - - name: Moves wheels - run: | - mkdir -p dist - cp -v ./*-manylinux*.whl dist/ - -# - name: update versions to conform with QGIS -# run: | -# cd .github -# python qgis_requirements.py -# cd .. -# -# - name: Build manylinux Python wheels -# uses: RalfG/python-wheels-manylinux-build@v0.7.1 -# with: -# python-versions: 'cp39-cp39' -# pip-wheel-args: '--no-deps' -# -# - name: Moves wheels -# run: | -# mkdir -p dist -# cp -v ./*-manylinux*.whl dist/ - - - name: Stores artifacts along with the workflow result - if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish wheels to PyPI - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/*-manylinux*.whl - - - name: Save wheels to AWS - if: ${{ (env.HAS_SECRETS == 'true') }} - uses: jakejarvis/s3-sync-action@master - with: - args: --acl public-read --follow-symlinks - env: - AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_REGION: 'us-east-1' # optional: defaults to us-east-1 - SOURCE_DIR: 'dist/' # optional: defaults to entire repository - DEST_DIR: 'wheels/' # optional: defaults to entire repository diff --git a/.github/workflows/build_mac.yml b/.github/workflows/build_mac.yml deleted file mode 100644 index 401204b28..000000000 --- a/.github/workflows/build_mac.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: Build and upload MacOS Python Package - -on: [pull_request, release] - -jobs: - deploy: - runs-on: macos-latest - env: - # Note that compiler options for macos-latest are documented here: - # https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - CC: "gcc-12" - CXX: "g++-12" - continue-on-error: true - strategy: - matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - steps: - - uses: actions/checkout@v4 - - name: Set Python environment - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel twine - pip install -r requirements.txt - pip install -r requirements_additional.txt - - - name: Build - run: | - python setup.py sdist bdist_wheel - - - name: Stores artifacts along with the workflow result - if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - twine upload dist/*.whl - - - name: Publish tar files - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') && (matrix.python-version == '3.11') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: twine upload dist/*.gz diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml new file mode 100644 index 000000000..086c8f1bb --- /dev/null +++ b/.github/workflows/build_wheels.yml @@ -0,0 +1,58 @@ +name: Build + +on: [pull_request] + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v3 + with: + platforms: all + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.5 + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl + + make_sdist: + name: Make SDist + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Build SDist + run: pipx run build --sdist + + - uses: actions/upload-artifact@v4 + with: + name: cibw-sdist + path: dist/*.tar.gz + + upload_all: + needs: [build_wheels, make_sdist] + environment: pypi + permissions: + id-token: write + runs-on: ubuntu-latest + if: github.event_name == 'release' && github.event.action == 'published' + steps: + - uses: actions/download-artifact@v4 + with: + pattern: cibw-* + path: dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml deleted file mode 100644 index 3d4677e74..000000000 --- a/.github/workflows/build_windows.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Build and upload Windows Python Package - -on: [pull_request, release] - -jobs: - deploy: - runs-on: windows-2019 - env: - HAS_SECRETS: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' }} - continue-on-error: true - strategy: - matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] - architecture: ['x64'] - steps: - - uses: actions/checkout@v4 - - name: Set Python environment - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: ${{ matrix.architecture }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel twine - pip install -r requirements.txt - pip install -r requirements_additional.txt - - - name: Build - run: python setup.py bdist_wheel - - - - name: Stores artifacts along with the workflow result -# if: ${{ github.event_name == 'push'}} - uses: actions/upload-artifact@v3 - with: - name: library - path: dist/*.whl - if-no-files-found: error # 'warn' or 'ignore' are also available, defaults to `warn` - - - name: Publish - if: ${{ (github.event_name == 'release') && (env.HAS_SECRETS == 'true') }} - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: twine upload dist/*.whl diff --git a/.github/workflows/debug_tests.yml b/.github/workflows/debug_tests.yml index 15cae28c1..66b00eb85 100644 --- a/.github/workflows/debug_tests.yml +++ b/.github/workflows/debug_tests.yml @@ -26,4 +26,4 @@ jobs: pip install -e . - name: Runs test - run: python -m pytest \ No newline at end of file + run: python -m pytest diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 065c9ffd4..dc8e77a42 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -1,11 +1,11 @@ name: Documentation on: - push: - branches: - - develop - pull_request: - release: + push: + branches: + - develop + pull_request: + release: jobs: build: diff --git a/.github/workflows/test_linux_with_coverage.yml b/.github/workflows/test_linux_with_coverage.yml index 38b5ae19f..93fe16396 100644 --- a/.github/workflows/test_linux_with_coverage.yml +++ b/.github/workflows/test_linux_with_coverage.yml @@ -30,4 +30,4 @@ jobs: - name: Generate coverage report run: | - python3 -m pytest --cov=aequilibrae tests/ \ No newline at end of file + python3 -m pytest --cov=aequilibrae tests/ diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5b6af38d9..b8f65c694 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -66,4 +66,4 @@ jobs: python setup.py build_ext --inplace - name: Runs test - run: python -m pytest \ No newline at end of file + run: python -m pytest diff --git a/__version__.py b/__version__.py index f68a650ed..6519541e0 100644 --- a/__version__.py +++ b/__version__.py @@ -1,5 +1,6 @@ +# When updating the version, one must also update the docs/source/_static/switcher.json file version = 1.0 -minor_version = "0" +minor_version = "1" release_name = "Rio de Janeiro" release_version = f"{version}.{minor_version}" diff --git a/aequilibrae/distribution/gravity_application.py b/aequilibrae/distribution/gravity_application.py index 08bf67be1..2d07fae65 100644 --- a/aequilibrae/distribution/gravity_application.py +++ b/aequilibrae/distribution/gravity_application.py @@ -1,5 +1,4 @@ import glob -import importlib.util as iutil import logging import os import tempfile @@ -15,9 +14,6 @@ from aequilibrae.distribution.synthetic_gravity_model import SyntheticGravityModel from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class GravityApplication: """Applies a synthetic gravity model. diff --git a/aequilibrae/distribution/ipf.py b/aequilibrae/distribution/ipf.py index 2378c40a4..5d3d45dcc 100644 --- a/aequilibrae/distribution/ipf.py +++ b/aequilibrae/distribution/ipf.py @@ -1,4 +1,3 @@ -import importlib.util as iutil import os from datetime import datetime from time import perf_counter @@ -12,9 +11,6 @@ from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData from aequilibrae.project.data.matrix_record import MatrixRecord -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class Ipf: """Iterative proportional fitting procedure diff --git a/aequilibrae/matrix/aequilibrae_matrix.py b/aequilibrae/matrix/aequilibrae_matrix.py index 13fc3c492..18817ee08 100644 --- a/aequilibrae/matrix/aequilibrae_matrix.py +++ b/aequilibrae/matrix/aequilibrae_matrix.py @@ -1,5 +1,4 @@ import functools -import importlib.util as iutil import os import tempfile import uuid @@ -11,13 +10,10 @@ import numpy as np import pandas as pd +import openmatrix as omx from scipy.sparse import coo_matrix # Checks if we can display OMX -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None -if has_omx: - import openmatrix as omx # CONSTANTS VERSION = 1 # VERSION OF THE MATRIX FORMAT @@ -54,9 +50,7 @@ # Offset: 18 + 50*cores + Y*20 | 18 + 50*cores + Y*20 + Y*zones*8 | -matrix_export_types = ["Aequilibrae matrix (*.aem)", "Comma-separated file (*.csv)"] -if has_omx: - matrix_export_types.append("Open matrix (*.omx)") +matrix_export_types = ["Open matrix (*.omx), Aequilibrae matrix (*.aem)", "Comma-separated file (*.csv)"] class AequilibraeMatrix(object): @@ -336,10 +330,6 @@ def robust_name(input_name: str, max_length: int, forbiden_names: List[str]) -> if trial_name not in forbiden_names: return trial_name - if not has_omx: - print("Open Matrix is not installed. Cannot continue") - return - if compressed: raise Warning("Matrix compression not yet supported") @@ -788,10 +778,6 @@ def export(self, output_name: str, cores: List[str] = None): """ fname, file_extension = os.path.splitext(output_name.upper()) - if file_extension == ".OMX": - if not has_omx: - raise ValueError("Open Matrix is not installed. Cannot continue") - if file_extension not in [".AEM", ".CSV", ".OMX"]: raise NotImplementedError(f"File extension {file_extension} not implemented yet") diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml index d7fe1be3e..9529aa360 100644 --- a/aequilibrae/parameters.yml +++ b/aequilibrae/parameters.yml @@ -42,30 +42,6 @@ network: description: name osm_source: name type: text - - cycleway: - description: cycleway, both way - osm_source: cycleway - type: text - - cycleway_right: - description: cycleway, right - osm_source: cycleway:right - type: text - - cycleway_left: - description: cycleway, left - osm_source: cycleway:left - type: text - - busway: - description: busway - osm_source: busway - type: text - - busway_right: - description: busway, right - osm_source: busway:right - type: text - - busway_left: - description: busway, left - osm_source: busway:left - type: text two-way: - lanes: description: lanes diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index bf307d87a..2a75572c1 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -24,7 +24,7 @@ class GraphBase(ABC): # noqa: B024 with degrees of two. This compresses long streams of links, such as along highways or curved roads, into single links. Dead end removal attempts to remove dead ends and fish spines from the network. It does this based on the observation - that in a graph with non-negative weights a dead end will over ever appear in the results of a short(est) path if the + that in a graph with non-negative weights a dead end will only ever appear in the results of a short(est) path if the origin or destination is present within that dead end. Dead end removal is applied before link contraction and does not create a strictly topological equivalent graph, @@ -237,9 +237,9 @@ def _build_directed_graph(self, network: pd.DataFrame, centroids: np.ndarray): if nans: self.logger.warning(f"Field(s) {nans} has(ve) at least one NaN value. Check your computations") - df.loc[:, "b_node"] = df.b_node.values.astype(self.__integer_type) - df.loc[:, "id"] = df.id.values.astype(self.__integer_type) - df.loc[:, "link_id"] = df.link_id.values.astype(self.__integer_type) + df["link_id"] = df["link_id"].astype(self.__integer_type) + df["b_node"] = df.b_node.values.astype(self.__integer_type) + df["id"] = df.id.values.astype(self.__integer_type) df["direction"] = df.direction.values.astype(np.int8) return all_nodes, num_nodes, nodes_to_indices, fs, df diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx index 1bb816b1e..4bd13c85e 100644 --- a/aequilibrae/paths/graph_building.pyx +++ b/aequilibrae/paths/graph_building.pyx @@ -32,8 +32,8 @@ cdef void _remove_dead_ends( # - nodes for which all out going links point to the same node and has the same number of back links. # This is a generalisation of dead-ends formed by a node with a single bidirectional link to another # - # Removal: At nodes of interest we begin a BFS search for links we can remove. It's uncommon to the BFS to extend far as the stopping - # criteria and conditions for expansion are very similar, often this just searches in along a line. + # Removal: At nodes of interest we begin a Breadth First Search (BFS) for links we can remove. It's uncommon for the BFS to extend far as the stopping + # criteria and conditions for expansion are very similar, often this just searches along a line. # For removal the node must have "no choices", that is, all outgoing edges point to the same node *and* all incoming edges can be account for as # from that same node. # The criteria for expansion is that there are no incoming edges *and* all outgoing edges point to the same node. @@ -68,16 +68,16 @@ cdef void _remove_dead_ends( continue ### Propagation - # We now know that he node we are looking at has a mix of incoming and outgoing edges, i.e. in_degree[node] > 0 and out_degree[node] > 0 + # We now know that the node we are looking at has a mix of incoming and outgoing edges, i.e. in_degree[node] > 0 and out_degree[node] > 0 # That implies that this node is reachable from some other node. We now need to assess if this node would ever be considered in pathfinding. - # To be considered there needs to be some form of real choice, i.e. there needs to be multiple ways to leave a new node. If the only way + # To be considered, there needs to be some form of real choice, i.e. there needs to be multiple ways to leave a new node. If the only way # to leave this node is back the way we came then the cost of the path # ... -> pre -> node -> pre -> ... # is greater than that of # ... -> pre -> ... # so we can remove this link to node. This is because negative cost cycles are disallowed in path finding. # - # We don't remove the case were the is only one new node that could be used to leave as it is handled in the graph compression. + # We don't remove the case where there is only one new node that could be used to leave as it is handled in the graph compression. # It would however, be possible to handle that here as well. # If all the outgoing edges are to the same node *and* all incoming edges come from that node, then there is no real choice and the diff --git a/aequilibrae/paths/network_skimming.py b/aequilibrae/paths/network_skimming.py index 9745fa52e..279518e42 100644 --- a/aequilibrae/paths/network_skimming.py +++ b/aequilibrae/paths/network_skimming.py @@ -22,9 +22,6 @@ if pyqt: from PyQt5.QtCore import pyqtSignal -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - sys.dont_write_bytecode = True diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 951a82b2b..bf4671d47 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -6,9 +6,15 @@ from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair from libcpp cimport bool +cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. +cimport pyarrow as pa +cimport pyarrow.lib as libpa +cimport pyarrow._dataset_parquet as pq +from libcpp.memory cimport shared_ptr + # std::linear_congruential_engine is not available in the Cython libcpp.random shim. We'll import it ourselves # from libcpp.random cimport minstd_rand -from libc.stdint cimport uint_fast32_t, uint_fast64_t +from libc.stdint cimport * cdef extern from "" namespace "std" nogil: cdef cppclass random_device: @@ -103,6 +109,21 @@ ctypedef unordered_set[vector[long long] *, OrderedVectorPointerHasher, PointerD ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, PointerDereferenceEqualTo[unordered_set[long long] *]] LinkSet_t ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t +# Pyarrow's Cython API does not provide all the functions available in the C++ API, some of them are really useful. +# Here we redeclare the classes with the functions we want, these are available in the current namespace, *not* libarrow +cdef extern from "arrow/builder.h" namespace "arrow" nogil: + + cdef cppclass CUInt32Builder" arrow::UInt32Builder"(libpa.CArrayBuilder): + CUInt32Builder(libpa.CMemoryPool* pool) + libpa.CStatus Append(const uint32_t value) + libpa.CStatus AppendValues(const vector[uint32_t] &values) + libpa.CStatus AppendValues(vector[uint32_t].const_reverse_iterator values_begin, vector[uint32_t].const_reverse_iterator values_end) + + cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(libpa.CArrayBuilder): + CDoubleBuilder(libpa.CMemoryPool* pool) + libpa.CStatus Append(const double value) + libpa.CStatus AppendValues(const vector[double] &values) + cdef class RouteChoiceSet: cdef: @@ -130,7 +151,21 @@ cdef class RouteChoiceSet: long long [:] thread_reached_first ) noexcept nogil - cdef RouteSet_t *generate_route_set( + cdef RouteSet_t *bfsle( + RouteChoiceSet self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes, + long long [:] _thread_reached_first, + unsigned int seed + ) noexcept nogil + + cdef RouteSet_t *link_penalisation( RouteChoiceSet self, long origin_index, long dest_index, @@ -141,5 +176,43 @@ cdef class RouteChoiceSet: long long [:] thread_conn, long long [:] thread_b_nodes, long long [:] _thread_reached_first, + double penatly, unsigned int seed ) noexcept nogil + + @staticmethod + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil + + @staticmethod + cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil + + @staticmethod + cdef vector[double] *compute_gamma( + RouteSet_t *route_set, + pair[vector[long long] *, vector[long long] *] &freq_set, + vector[double] &total_cost, + double[:] cost_view + ) noexcept nogil + + @staticmethod + cdef vector[double] *compute_prob( + vector[double] &total_cost, + vector[double] &gamma_vec, + double beta, + double theta + ) noexcept nogil + + @staticmethod + cdef shared_ptr[libpa.CTable] make_table_from_results( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &route_sets, + vector[vector[double] *] *cost_set, + vector[vector[double] *] *gamma_set, + vector[vector[double] *] *prob_set + ) + +cdef class Checkpoint: + cdef: + public object where + public object schema + public object partition_cols diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index dfcb09ec6..b22dd0c08 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -54,18 +54,36 @@ routes aren't required small-ish things like the memcpy and banned link set copy from aequilibrae import Graph -from libc.math cimport INFINITY +from libc.math cimport INFINITY, pow, exp from libc.string cimport memcpy from libc.limits cimport UINT_MAX +from libc.stdlib cimport abort +from libcpp cimport nullptr from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from cython.operator cimport dereference as deref +from libcpp.algorithm cimport sort, lower_bound +from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid +cimport openmp import numpy as np +import pyarrow as pa from typing import List, Tuple +import itertools +import pathlib +import logging +import warnings + +cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. +cimport pyarrow as pa +cimport pyarrow.lib as libpa +import pyarrow.dataset +import pyarrow.parquet as pq +from libcpp.memory cimport shared_ptr + +from libc.stdio cimport fprintf, printf, stderr # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' @@ -77,6 +95,23 @@ cdef class RouteChoiceSet: Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data' """ + route_set_dtype = pa.list_(pa.uint32()) + + schema = pa.schema([ + pa.field("origin id", pa.uint32(), nullable=False), + pa.field("destination id", pa.uint32(), nullable=False), + pa.field("route set", route_set_dtype, nullable=False), + ]) + + psl_schema = pa.schema([ + pa.field("origin id", pa.uint32(), nullable=False), + pa.field("destination id", pa.uint32(), nullable=False), + pa.field("route set", route_set_dtype, nullable=False), + pa.field("cost", pa.float64(), nullable=False), + pa.field("gamma", pa.float64(), nullable=False), + pa.field("probability", pa.float64(), nullable=False), + ]) + def __cinit__(self): """C level init. For C memory allocation and initialisation. Called exactly once per object.""" pass @@ -88,14 +123,17 @@ cdef class RouteChoiceSet: self.graph_fs_view = graph.compact_fs self.b_nodes_view = graph.compact_graph.b_node.values self.nodes_to_indices_view = graph.compact_nodes_to_indices - tmp = graph.lonlat_index.loc[graph.compact_all_nodes] - self.lat_view = tmp.lat.values - self.lon_view = tmp.lon.values + + # tmp = graph.lonlat_index.loc[graph.compact_all_nodes] + # self.lat_view = tmp.lat.values + # self.lon_view = tmp.lon.values + self.a_star = False + self.ids_graph_view = graph.compact_graph.id.values self.num_nodes = graph.compact_num_nodes self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows - self.a_star = True + def __dealloc__(self): """ @@ -105,34 +143,45 @@ cdef class RouteChoiceSet: pass @cython.embedsignature(True) - def run(self, origin: int, destination: int, max_routes: int = 0, max_depth: int = 0, seed: int = 0, a_star: bool = True): + def run(self, origin: int, destination: int, *args, **kwargs): """ Compute the a route set for a single OD pair. Often the returned list's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all unique possible paths have been found then a smaller set will be returned. - Thin wrapper around ``RouteChoiceSet.batched``. + Thin wrapper around ``RouteChoiceSet.batched``. Additional arguments are forwarded to ``RouteChoiceSet.batched``. :Arguments: **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a centroid. **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to choose a centroid. - **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. - **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. - **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. Represents paths from ``origin`` to ``destination``. """ - return self.batched([(origin, destination)], max_routes=max_routes, max_depth=max_depth, seed=seed, a_star=a_star)[(origin, destination)] + return [tuple(x) for x in self.batched([(origin, destination)], *args, **kwargs).column("route set").to_pylist()] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - def batched(self, ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, seed: int = 0, cores: int = 1, a_star: bool = True): + def batched( + self, + ods: List[Tuple[int, int]], + max_routes: int = 0, + max_depth: int = 0, + seed: int = 0, + cores: int = 0, + a_star: bool = True, + bfsle: bool = True, + penalty: float = 0.0, + where: Optional[str] = None, + path_size_logit: bool = False, + beta: float = 1.0, + theta: float = 1.0, + ): """ Compute the a route set for a list of OD pairs. @@ -143,43 +192,35 @@ cdef class RouteChoiceSet: **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination node ID must be present within compact graph. Recommended to choose a centroids. **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. - **max_depth** (:obj:`int`): Maximum depth BFS can explore. Must be non-negative. Default of ``0`` for unlimited. + **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation. + Must be non-negative. Default of ``0`` for unlimited. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. - **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``1``. + **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available. + **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. + **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. + **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. :Returns: **route sets** (:obj:`dict[tuple[int, int], list[tuple[int, ...]]]`): Returns a list of unique tuples of compact link IDs for - each OD pair provided (as keys). Represents paths from - ``origin`` to ``destination``. + each OD pair provided (as keys). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. """ cdef: - vector[pair[long long, long long]] c_ods = ods - - # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory - double [:, :] cost_matrix = np.empty((cores, self.cost_view.shape[0]), dtype=float) - long long [:, :] predecessors_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] conn_matrix = np.empty((cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (cores, self.b_nodes_view.shape[0])).copy() - - # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the - # interface. - long long [:, :] _reached_first_matrix - - vector[RouteSet_t *] *results = new vector[RouteSet_t *](len(ods)) - long long origin_index, dest_index, i + long long o, d if max_routes == 0 and max_depth == 0: raise ValueError("Either `max_routes` or `max_depth` must be > 0") - if max_routes < 0 or max_depth < 0 or cores < 0: + if max_routes < 0 or max_depth < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") - cdef: - unsigned int c_max_routes = max_routes - unsigned int c_max_depth = max_depth - unsigned int c_seed = seed - unsigned int c_cores = cores - long long o, d + if penalty != 0.0 and bfsle: + raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once") + + if not bfsle and penalty <= 1.0: + raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") + + if path_size_logit and (beta < 0 or theta <= 0): + raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model") for o, d in ods: if self.nodes_to_indices_view[o] == -1: @@ -187,63 +228,180 @@ cdef class RouteChoiceSet: if self.nodes_to_indices_view[d] == -1: raise ValueError(f"Destination {d} is not present within the compact graph") - self.a_star = a_star + cdef: + long long origin_index, dest_index, i + unsigned int c_max_routes = max_routes + unsigned int c_max_depth = max_depth + unsigned int c_seed = seed + unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads() + + vector[pair[long long, long long]] c_ods + + # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory + double [:, :] cost_matrix = np.empty((c_cores, self.cost_view.shape[0]), dtype=float) + long long [:, :] predecessors_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] conn_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) + long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (c_cores, self.b_nodes_view.shape[0])).copy() + + # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the + # interface. + long long [:, :] _reached_first_matrix + + vector[RouteSet_t *] *results + size_t max_results_len, batch_len, j + + # self.a_star = a_star if self.a_star: - _reached_first_matrix = np.zeros((cores, 1), dtype=np.int64) # Dummy array to allow slicing + _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64) # Dummy array to allow slicing + else: + _reached_first_matrix = np.zeros((c_cores, self.num_nodes + 1), dtype=np.int64) + + set_ods = set(ods) + if len(set_ods) != len(ods): + warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs") + + if where is not None: + checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) + batches = list(Checkpoint.batches(list(set_ods))) + max_results_len = max(len(batch) for batch in batches) + else: + batches = [list(set_ods)] + max_results_len = len(set_ods) + + results = new vector[RouteSet_t *](max_results_len) + + cdef: + RouteSet_t *route_set + pair[vector[long long] *, vector[long long] *] freq_pair + vector[long long] *link_union = nullptr + vector[vector[double] *] *cost_set = nullptr + vector[vector[double] *] *gamma_set = nullptr + vector[vector[double] *] *prob_set= nullptr + + if path_size_logit: + cost_set = new vector[vector[double] *](max_results_len) + gamma_set = new vector[vector[double] *](max_results_len) + prob_set = new vector[vector[double] *](max_results_len) + + for batch in batches: + c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer + batch_len = c_ods.size() + results.resize(batch_len) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed + + if path_size_logit: + cost_set.clear() + gamma_set.clear() + prob_set.clear() + + cost_set.resize(batch_len) + gamma_set.resize(batch_len) + prob_set.resize(batch_len) + + with nogil, parallel(num_threads=c_cores): + # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. + # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch + if path_size_logit: + link_union = new vector[long long]() + + for i in prange(batch_len): + origin_index = self.nodes_to_indices_view[c_ods[i].first] + dest_index = self.nodes_to_indices_view[c_ods[i].second] + + if self.block_flows_through_centroids: + blocking_centroid_flows( + 0, # Always blocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + + if bfsle: + route_set = RouteChoiceSet.bfsle( + self, + origin_index, + dest_index, + c_max_routes, + c_max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], + b_nodes_matrix[threadid()], + _reached_first_matrix[threadid()], + c_seed, + ) + else: + route_set = RouteChoiceSet.link_penalisation( + self, + origin_index, + dest_index, + c_max_routes, + c_max_depth, + cost_matrix[threadid()], + predecessors_matrix[threadid()], + conn_matrix[threadid()], + b_nodes_matrix[threadid()], + _reached_first_matrix[threadid()], + penalty, + c_seed, + ) + + if path_size_logit: + link_union.clear() + freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union)) + deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view) + deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) + deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta) + del freq_pair.first + del freq_pair.second + + deref(results)[i] = route_set + + if self.block_flows_through_centroids: + blocking_centroid_flows( + 1, # Always unblocking + origin_index, + self.zones, + self.graph_fs_view, + b_nodes_matrix[threadid()], + self.b_nodes_view, + ) + + if path_size_logit: + del link_union + + table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) + + # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures + if path_size_logit: + for j in range(batch_len): + del deref(cost_set)[j] + del deref(gamma_set)[j] + del deref(prob_set)[j] + + for j in range(batch_len): + for route in deref(deref(results)[j]): + del route + + if where is not None: + checkpoint.write(table) + del table + else: + break # There was only one batch anyway + + # We're done with everything now, we can free the outer internal structures + if path_size_logit: + del cost_set + del gamma_set + del prob_set + del results + + if where is None: + return table else: - _reached_first_matrix = np.zeros((cores, self.num_nodes), dtype=np.int64) - - with nogil, parallel(num_threads=c_cores): - for i in prange(c_ods.size()): - origin_index = self.nodes_to_indices_view[c_ods[i].first] - dest_index = self.nodes_to_indices_view[c_ods[i].second] - - if self.block_flows_through_centroids: - blocking_centroid_flows( - 0, # Always blocking - origin_index, - self.zones, - self.graph_fs_view, - b_nodes_matrix[threadid()], - self.b_nodes_view, - ) - - deref(results)[i] = RouteChoiceSet.generate_route_set( - self, - origin_index, - dest_index, - c_max_routes, - c_max_depth, - cost_matrix[threadid()], - predecessors_matrix[threadid()], - conn_matrix[threadid()], - b_nodes_matrix[threadid()], - _reached_first_matrix[threadid()], - c_seed, - ) - - if self.block_flows_through_centroids: - blocking_centroid_flows( - 1, # Always unblocking - origin_index, - self.zones, - self.graph_fs_view, - b_nodes_matrix[threadid()], - self.b_nodes_view, - ) - - # Build results into python objects using Cythons auto-conversion from vector[long long] to list then to tuple (for set operations) - res = [] - for result in deref(results): - links = [] - for route in deref(result): - links.append(tuple(deref(route))) - del route - res.append(links) - - del results - return dict(zip(ods, res)) + return @cython.initializedcheck(False) cdef void path_find( @@ -289,7 +447,7 @@ cdef class RouteChoiceSet: @cython.wraparound(False) @cython.embedsignature(True) @cython.initializedcheck(False) - cdef RouteSet_t *generate_route_set( + cdef RouteSet_t *bfsle( RouteChoiceSet self, long origin_index, long dest_index, @@ -385,3 +543,329 @@ cdef class RouteChoiceSet: del banned return route_set + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cdef RouteSet_t *link_penalisation( + RouteChoiceSet self, + long origin_index, + long dest_index, + unsigned int max_routes, + unsigned int max_depth, + double [:] thread_cost, + long long [:] thread_predecessors, + long long [:] thread_conn, + long long [:] thread_b_nodes, + long long [:] _thread_reached_first, + double penatly, + unsigned int seed + ) noexcept nogil: + cdef: + RouteSet_t *route_set + + # Scratch objects + vector[long long] *vec + long long p, connector + + max_routes = max_routes if max_routes != 0 else UINT_MAX + max_depth = max_depth if max_depth != 0 else UINT_MAX + route_set = new RouteSet_t() + memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) + + for depth in range(max_depth): + if route_set.size() >= max_routes: + break + + RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first) + + if thread_predecessors[dest_index] >= 0: + vec = new vector[long long]() + # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be + p = dest_index + while p != origin_index: + connector = thread_conn[p] + p = thread_predecessors[p] + vec.push_back(connector) + + for connector in deref(vec): + thread_cost[connector] *= penatly + + route_set.insert(vec) + else: + break + + return route_set + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil: + cdef: + vector[long long] *keys + vector[long long] *counts + + # Scratch objects + size_t length, count + long long link, i + + link_union.clear() + + keys = new vector[long long]() + counts = new vector[long long]() + + length = 0 + for route in deref(route_set): + length = length + route.size() + link_union.reserve(length) + + for route in deref(route_set): + link_union.insert(link_union.end(), route.begin(), route.end()) + + sort(link_union.begin(), link_union.end()) + + union_iter = link_union.begin() + while union_iter != link_union.end(): + count = 0 + link = deref(union_iter) + while link == deref(union_iter): + count = count + 1 + inc(union_iter) + + keys.push_back(link) + counts.push_back(count) + + return make_pair(keys, counts) + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef vector[double] *compute_cost(RouteSet_t *route_set, double[:] cost_view) noexcept nogil: + cdef: + vector[double] *cost_vec + + # Scratch objects + double cost + long long link, i + + cost_vec = new vector[double]() + cost_vec.reserve(route_set.size()) + + for route in deref(route_set): + cost = 0.0 + for link in deref(route): + cost = cost + cost_view[link] + + cost_vec.push_back(cost) + + return cost_vec + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef vector[double] *compute_gamma( + RouteSet_t *route_set, + pair[vector[long long] *, vector[long long] *] &freq_set, + vector[double] &total_cost, + double[:] cost_view + ) noexcept nogil: + """ + Notation changes: + i: j + a: link + t_a: cost_view + c_i: total_costs + A_i: route + sum_{k in R}: delta_{a,k}: freq_set + """ + cdef: + vector[double] *gamma_vec + + # Scratch objects + vector[long long].const_iterator link_iter + double gamma + long long link, j + size_t i + + gamma_vec = new vector[double]() + gamma_vec.reserve(route_set.size()) + + j = 0 + for route in deref(route_set): + gamma = 0.0 + for link in deref(route): + # We know the frequency table is ordered and contains every link in the union of the routes. + # We want to find the index of the link, and use that to look up it's frequency + link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) + + gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + + gamma_vec.push_back(gamma / total_cost[j]) + + j = j + 1 + + return gamma_vec + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef vector[double] *compute_prob( + vector[double] &total_cost, + vector[double] &gamma_vec, + double beta, + double theta + ) noexcept nogil: + cdef: + # Scratch objects + vector[double] *prob_vec + double inv_prob + long long route_set_idx + size_t i, j + + prob_vec = new vector[double]() + prob_vec.reserve(total_cost.size()) + + # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results + for i in range(total_cost.size()): + inv_prob = 0.0 + for j in range(total_cost.size()): + inv_prob = inv_prob + pow(gamma_vec[j] / gamma_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i])) + + prob_vec.push_back(1.0 / inv_prob) + + return prob_vec + + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.boundscheck(False) + @cython.initializedcheck(False) + @staticmethod + cdef shared_ptr[libpa.CTable] make_table_from_results( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &route_sets, + vector[vector[double] *] *cost_set, + vector[vector[double] *] *gamma_set, + vector[vector[double] *] *prob_set + ): + cdef: + shared_ptr[libpa.CArray] paths + shared_ptr[libpa.CArray] offsets + + libpa.CMemoryPool *pool = libpa.c_get_memory_pool() + + # Custom imports, these are declared in route_choice.pxd *not* libarrow. + CUInt32Builder *path_builder = new CUInt32Builder(pool) + CDoubleBuilder *cost_col = nullptr + CDoubleBuilder *gamma_col = nullptr + CDoubleBuilder *prob_col = nullptr + + libpa.CInt32Builder *offset_builder = new libpa.CInt32Builder(pool) # Must be Int32 *not* UInt32 + libpa.CUInt32Builder *o_col = new libpa.CUInt32Builder(pool) + libpa.CUInt32Builder *d_col = new libpa.CUInt32Builder(pool) + vector[shared_ptr[libpa.CArray]] columns + shared_ptr[libpa.CDataType] route_set_dtype = libpa.pyarrow_unwrap_data_type(RouteChoiceSet.route_set_dtype) + + libpa.CResult[shared_ptr[libpa.CArray]] route_set_results + + int offset = 0 + bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr) + + # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route] + columns.resize(6 if psl else 3) + + if psl: + cost_col = new CDoubleBuilder(pool) + gamma_col = new CDoubleBuilder(pool) + prob_col = new CDoubleBuilder(pool) + + for i in range(ods.size()): + cost_col.AppendValues(deref(deref(cost_set)[i])) + gamma_col.AppendValues(deref(deref(gamma_set)[i])) + prob_col.AppendValues(deref(deref(prob_set)[i])) + + for i in range(ods.size()): + route_set = route_sets[i] + + # Instead of construction a "list of lists" style object for storing the route sets we instead will construct one big array of link ids + # with a corresponding offsets array that indicates where each new row (path) starts. + for route in deref(route_set): + o_col.Append(ods[i].first) + d_col.Append(ods[i].second) + + offset_builder.Append(offset) + path_builder.AppendValues(route.crbegin(), route.crend()) + + offset += route.size() + + path_builder.Finish(&paths) + + offset_builder.Append(offset) # Mark the end of the array in offsets + offset_builder.Finish(&offsets) + + route_set_results = libpa.CListArray.FromArraysAndType(route_set_dtype, deref(offsets.get()), deref(paths.get()), pool, shared_ptr[libpa.CBuffer]()) + + o_col.Finish(&columns[0]) + d_col.Finish(&columns[1]) + columns[2] = deref(route_set_results) + + if psl: + cost_col.Finish(&columns[3]) + gamma_col.Finish(&columns[4]) + prob_col.Finish(&columns[5]) + + cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema) + cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(schema, columns) + + del path_builder + del offset_builder + del o_col + del d_col + + if psl: + del cost_col + del gamma_col + del prob_col + + return table + + +@cython.embedsignature(True) +cdef class Checkpoint: + """ + A small wrapper class to write a dataset partition by partition + """ + + def __init__(self, where, schema, partition_cols = None): + """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" + self.where = pathlib.Path(where) + self.schema = schema + self.partition_cols = partition_cols + + def write(self, table): + logger = logging.getLogger("aequilibrae") + pq.write_to_dataset( + table, + self.where, + partitioning=self.partition_cols, + partitioning_flavor="hive", + schema=self.schema, + use_threads=True, + existing_data_behavior="overwrite_or_ignore", + file_visitor=lambda written_file: logger.info(f"Wrote partition dataset at {written_file.path}") + ) + + def read_dataset(self): + return pa.dataset.dataset(self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema)) + + @staticmethod + def batches(ods: List[Tuple[int, int]]): + return (list(g) for k, g in itertools.groupby(sorted(ods), key=lambda x: x[0])) diff --git a/aequilibrae/paths/traffic_assignment.py b/aequilibrae/paths/traffic_assignment.py index a83b120d7..f139873d5 100644 --- a/aequilibrae/paths/traffic_assignment.py +++ b/aequilibrae/paths/traffic_assignment.py @@ -1,4 +1,3 @@ -import importlib.util as iutil import logging import socket import sqlite3 @@ -23,9 +22,6 @@ from aequilibrae.paths.vdf import VDF, all_vdf_functions from aequilibrae.project.database_connection import database_connection -spec = iutil.find_spec("openmatrix") -has_omx = spec is not None - class AssignmentBase(ABC): def __init__(self, project=None): @@ -669,7 +665,6 @@ def save_skims(self, matrix_name: str, which_ones="final", format="omx", project mat_format = format.lower() if mat_format not in ["omx", "aem"]: raise ValueError("Matrix needs to be either OMX or native AequilibraE") - if mat_format == "omx" and not has_omx: raise ImportError("OpenMatrix is not available on your system") if not project: diff --git a/aequilibrae/project/network/link_types.py b/aequilibrae/project/network/link_types.py index 05b3524ae..df3f91140 100644 --- a/aequilibrae/project/network/link_types.py +++ b/aequilibrae/project/network/link_types.py @@ -1,6 +1,7 @@ -from sqlite3 import IntegrityError, Connection -from aequilibrae.project.network.link_type import LinkType +from sqlite3 import IntegrityError + from aequilibrae.project.field_editor import FieldEditor +from aequilibrae.project.network.link_type import LinkType from aequilibrae.project.table_loader import TableLoader from aequilibrae.utils.db_utils import commit_and_close from aequilibrae.utils.spatialite_utils import connect_spatialite @@ -84,7 +85,6 @@ def new(self, link_type_id: str) -> LinkType: tp["link_type_id"] = link_type_id lt = LinkType(tp, self.project) self.__items[link_type_id] = lt - self.logger.warning("Link type has not yet been saved to the database. Do so explicitly") return lt def delete(self, link_type_id: str) -> None: diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index b9d5f8115..68c0a45fa 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -126,6 +126,7 @@ def create_from_osm( model_area: Optional[Polygon] = None, place_name: Optional[str] = None, modes=("car", "transit", "bicycle", "walk"), + clean=True, ) -> None: """ Downloads the network from Open-Street Maps @@ -141,6 +142,9 @@ def create_from_osm( **modes** (:obj:`tuple`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter file + **clean** (:obj:`bool`, Optional): Keeps only the links that intersects the model area polygon. Defaults to + True. Does not apply to networks downloaded with a place name + .. code-block:: python >>> from aequilibrae import Project @@ -191,6 +195,7 @@ def create_from_osm( raise ValueError("Coordinates out of bounds. Polygon must be in WGS84") west, south, east, north = model_area.bounds else: + clean = False bbox, report = placegetter(place_name) if bbox is None: msg = f'We could not find a reference for place name "{place_name}"' @@ -229,14 +234,14 @@ def create_from_osm( if subarea.intersects(model_area): polygons.append(subarea) self.logger.info("Downloading data") - self.downloader = OSMDownloader(polygons, modes, logger=self.logger) + dwnloader = OSMDownloader(polygons, modes, logger=self.logger) if pyqt: - self.downloader.downloading.connect(self.signal_handler) + dwnloader.downloading.connect(self.signal_handler) - self.downloader.doWork() + dwnloader.doWork() self.logger.info("Building Network") - self.builder = OSMBuilder(self.downloader.json, project=self.project, model_area=model_area) + self.builder = OSMBuilder(dwnloader.data, project=self.project, model_area=model_area, clean=clean) if pyqt: self.builder.building.connect(self.signal_handler) diff --git a/aequilibrae/project/network/osm/model_area_gridding.py b/aequilibrae/project/network/osm/model_area_gridding.py new file mode 100644 index 000000000..bcaa1a6a3 --- /dev/null +++ b/aequilibrae/project/network/osm/model_area_gridding.py @@ -0,0 +1,26 @@ +# Inspired by https://www.matecdev.com/posts/shapely-polygon-gridding.html +from math import ceil + +import geopandas as gpd +import numpy as np +from shapely.geometry import Polygon + + +def geometry_grid(model_area, srid) -> gpd.GeoDataFrame: + minx, miny, maxx, maxy = model_area.bounds + # Some rough heuristic to get the number of points per sub-polygon in the 2 digits range + subd = ceil((len(model_area.exterior.coords) / 32) ** 0.5) + dx = (maxx - minx) / subd + dy = (maxy - miny) / subd + elements = [] + x1 = minx + for i in range(subd): + j1 = miny + for j in range(subd): + elements.append(Polygon([[x1, j1], [x1, j1 + dy], [x1 + dx, j1 + dy], [x1 + dx, j1]])) + j1 += dy + x1 += dx + + gdf = gpd.GeoDataFrame({"id": np.arange(len(elements))}, geometry=elements, crs=srid) + + return gdf.clip(model_area) diff --git a/aequilibrae/project/network/osm/osm_builder.py b/aequilibrae/project/network/osm/osm_builder.py index ecc10c222..af14827cb 100644 --- a/aequilibrae/project/network/osm/osm_builder.py +++ b/aequilibrae/project/network/osm/osm_builder.py @@ -1,50 +1,55 @@ +import geopandas as gpd import gc import importlib.util as iutil import string -from typing import List +from math import floor +from pathlib import Path +from typing import List, Tuple import numpy as np import pandas as pd -from shapely import Point +from pandas import json_normalize from shapely.geometry import Polygon from aequilibrae.context import get_active_project from aequilibrae.parameters import Parameters -from aequilibrae.project.network.haversine import haversine -from aequilibrae.project.network.link_types import LinkTypes +from aequilibrae.project.project_creation import remove_triggers, add_triggers from aequilibrae.utils import WorkerThread -from aequilibrae.utils.db_utils import commit_and_close +from aequilibrae.utils.db_utils import commit_and_close, read_and_close, list_columns from aequilibrae.utils.spatialite_utils import connect_spatialite +from .model_area_gridding import geometry_grid pyqt = iutil.find_spec("PyQt5") is not None if pyqt: from PyQt5.QtCore import pyqtSignal -if iutil.find_spec("qgis") is not None: - pass - class OSMBuilder(WorkerThread): if pyqt: building = pyqtSignal(object) - def __init__(self, osm_items: List, project, model_area: Polygon) -> None: + def __init__(self, data, project, model_area: Polygon, clean: bool) -> None: WorkerThread.__init__(self, None) + + project.logger.info("Preparing OSM builder") + self.__emit_all(["text", "Preparing OSM builder"]) + self.project = project or get_active_project() self.logger = self.project.logger - self.osm_items = osm_items - self.model_area = model_area + self.model_area = geometry_grid(model_area, 4326) self.path = self.project.path_to_file self.node_start = 10000 - self.__link_types = None # type: LinkTypes + self.clean = clean self.report = [] - self.__model_link_types = [] - self.__model_link_type_ids = [] - self.__link_type_quick_reference = {} - self.nodes = {} - self.node_df = [] - self.links = {} - self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))""" + self.__all_ltp = pd.DataFrame([]) + self.__link_id = 1 + self.__valid_links = [] + + # Building shapely geometries makes the code surprisingly slower. + self.node_df = data["nodes"] + self.node_df.loc[:, "node_id"] = np.arange(self.node_start, self.node_start + self.node_df.shape[0]) + gc.collect() + self.links_df = data["links"] def __emit_all(self, *args): if pyqt: @@ -52,212 +57,183 @@ def __emit_all(self, *args): def doWork(self): with commit_and_close(connect_spatialite(self.path)) as conn: - self.__worksetup() - node_count = self.data_structures() - self.importing_links(node_count, conn) + self.__update_table_structure(conn) + self.importing_network(conn) + + self.logger.info("Cleaning things up") conn.execute( "DELETE FROM nodes WHERE node_id NOT IN (SELECT a_node FROM links union all SELECT b_node FROM links)" ) + conn.commit() + self.__do_clean(conn) + self.__emit_all(["finished_threaded_procedure", 0]) - def data_structures(self): - self.logger.info("Separating nodes and links") - self.__emit_all(["text", "Separating nodes and links"]) - self.__emit_all(["maxValue", len(self.osm_items)]) - - alinks = [] - n = [] - tot_items = len(self.osm_items) - # When downloading data for entire countries, memory consumption can be quite intensive - # So we get rid of everything we don't need - for i in range(tot_items, 0, -1): - item = self.osm_items.pop(-1) - if item["type"] == "way": - alinks.append(item) - elif item["type"] == "node": - n.append(item) - self.__emit_all(["Value", tot_items - i]) - gc.collect() + def importing_network(self, conn): + self.logger.info("Importing the network") + node_count = pd.DataFrame(self.links_df["nodes"].explode("nodes")).assign(counter=1).groupby("nodes").count() - self.logger.info("Setting data structures for nodes") - self.__emit_all(["text", "Setting data structures for nodes"]) - self.__emit_all(["maxValue", len(n)]) - - self.node_df = [] - for i, node in enumerate(n): - nid = node.pop("id") - _ = node.pop("type") - node["node_id"] = i + self.node_start - node["inside_model"] = self.model_area.contains(Point(node["lon"], node["lat"])) - self.nodes[nid] = node - self.node_df.append([node["node_id"], nid, node["lon"], node["lat"]]) - self.__emit_all(["Value", i]) - del n - self.node_df = ( - pd.DataFrame(self.node_df, columns=["A", "B", "C", "D"]) - .drop_duplicates(subset=["C", "D"]) - .to_records(index=False) - ) - - self.logger.info("Setting data structures for links") - self.__emit_all(["text", "Setting data structures for links"]) - self.__emit_all(["maxValue", len(alinks)]) - - all_nodes = [] - for i, link in enumerate(alinks): - osm_id = link.pop("id") - _ = link.pop("type") - all_nodes.extend(link["nodes"]) - self.links[osm_id] = link - self.__emit_all(["Value", i]) - del alinks - - self.logger.info("Finalizing data structures") - self.__emit_all(["text", "Finalizing data structures"]) - - node_count = self.unique_count(np.array(all_nodes)) - - return node_count - - def importing_links(self, node_count, conn): - node_ids = {} - - vars = {} - vars["link_id"] = 1 - table = "links" - fields = self.get_link_fields() - self.__update_table_structure(conn) - field_names = ",".join(fields) - - self.logger.info("Adding network nodes") - self.__emit_all(["text", "Adding network nodes"]) - sql = "insert into nodes(node_id, is_centroid, osm_id, geometry) Values(?, 0, ?, MakePoint(?,?, 4326))" - conn.executemany(sql, self.node_df) - conn.commit() - del self.node_df + self.node_df.osm_id = self.node_df.osm_id.astype(np.int64) + self.node_df.set_index(["osm_id"], inplace=True) + + self.__process_link_chunk() + shape_ = self.links_df.shape[0] + message_step = max(1, floor(shape_ / 100)) + self.__emit_all(["maxValue", shape_]) - self.logger.info("Adding network links") + self.logger.info("Geo-procesing links") self.__emit_all(["text", "Adding network links"]) - L = len(list(self.links.keys())) - self.__emit_all(["maxValue", L]) - - counter = 0 - mode_codes, not_found_tags = self.modes_per_link_type(conn) - owf, twf = self.field_osm_source() - all_attrs = [] - all_osm_ids = list(self.links.keys()) - for osm_id in all_osm_ids: - link = self.links.pop(osm_id) + geometries = [] + self.links_df.set_index(["osm_id"], inplace=True) + for counter, (idx, link) in enumerate(self.links_df.iterrows()): self.__emit_all(["Value", counter]) - counter += 1 - if counter % 1000 == 0: - self.logger.info(f"Creating segments from {counter:,} out of {L:,} OSM link objects") - vars["osm_id"] = osm_id - vars["link_type"] = "default" - linknodes = link["nodes"] - linktags = link["tags"] - - indices = np.searchsorted(node_count[:, 0], linknodes) - nodedegree = node_count[indices, 1] - - # Makes sure that beginning and end are end nodes for a link - nodedegree[0] = 2 - nodedegree[-1] = 2 - - intersections = np.where(nodedegree > 1)[0] - segments = intersections.shape[0] - 1 - - # Attributes that are common to all individual links/segments - vars["direction"] = (linktags.get("oneway") == "yes") * 1 - - for k, v in owf.items(): - vars[k] = linktags.get(v) - - for k, v in twf.items(): - val = linktags.get(v["osm_source"]) - if vars["direction"] == 0: - for d1, d2 in [("ab", "forward"), ("ba", "backward")]: - vars[f"{k}_{d1}"] = self.__get_link_property(d2, val, linktags, v) - elif vars["direction"] == -1: - vars[f"{k}_ba"] = linktags.get(f"{v['osm_source']}:{'backward'}", val) - elif vars["direction"] == 1: - vars[f"{k}_ab"] = linktags.get(f"{v['osm_source']}:{'forward'}", val) - - vars["modes"] = mode_codes.get(linktags.get("highway"), not_found_tags) - - vars["link_type"] = self.__link_type_quick_reference.get( - vars["link_type"].lower(), self.__repair_link_type(vars["link_type"]) - ) + if counter % message_step == 0: + self.logger.info(f"Creating segments from {counter:,} out of {shape_ :,} OSM link objects") + + # How can I link have less than two points? + if not isinstance(link["nodes"], list): + self.logger.debug(f"OSM link/feature {idx} does not have a list of nodes.") + continue + + if len(link["nodes"]) < 2: + self.logger.debug(f"Link {idx} has less than two nodes. {link.nodes}") + continue + + # The link is a straight line between two points + # Or all midpoints are only part of a single link + node_indices = node_count.loc[link["nodes"], "counter"].to_numpy() + if len(link["nodes"]) == 2 or node_indices[1:-1].max() == 1: + # The link has no intersections + geometries.append([idx, self._build_geometry(link.nodes)]) + else: + # Make sure we get the first and last nodes, as they are certainly the extremities of the sublinks + node_indices[0] = 2 + node_indices[-1] = 2 + # The link has intersections + # We build repeated records for links when they have intersections + # This is because it is faster to do this way and then have all the data repeated + # when doing the join with the link fields below + intersecs = np.where(node_indices > 1)[0] + for i, j in zip(intersecs[:-1], intersecs[1:]): + geometries.append([idx, self._build_geometry(link.nodes[i : j + 1])]) + + # Builds the link Geo dataframe + self.links_df.drop(columns=["nodes"], inplace=True) + # We build a dataframe with the geometries created above + # and join with the database + geo_df = pd.DataFrame(geometries, columns=["link_id", "geometry"]).set_index("link_id") + self.links_df = self.links_df.join(geo_df, how="inner") + + self.links_df.loc[:, "link_id"] = np.arange(self.links_df.shape[0]) + 1 + + self.node_df = self.node_df.reset_index() + + # Saves the data to disk in case of issues loading it to the database + osm_data_path = Path(self.project.project_base_path) / "osm_data" + osm_data_path.mkdir(exist_ok=True) + self.links_df.to_parquet(osm_data_path / "links.parquet") + self.node_df.to_parquet(osm_data_path / "nodes.parquet") + + self.logger.info("Adding nodes to file") + self.__emit_all(["text", "Adding nodes to file"]) + + # Removing the triggers before adding all nodes makes things a LOT faster + remove_triggers(conn, self.logger, "network") + + cols = ["node_id", "osm_id", "is_centroid", "modes", "link_types", "lon", "lat"] + insert_qry = f"INSERT INTO nodes ({','.join(cols[:-2])}, geometry) VALUES(?,?,?,?,?, MakePoint(?,?, 4326))" + conn.executemany(insert_qry, self.node_df[cols].to_records(index=False)) - if len(vars["modes"]) > 0: - for i in range(segments): - attributes = self.__build_link_data(vars, intersections, i, linknodes, node_ids, fields) - if attributes is None: - continue - all_attrs.append(attributes) - vars["link_id"] += 1 - - self.__emit_all(["text", f"{counter:,} of {L:,} super links added"]) - self.links[osm_id] = [] - sql = self.insert_qry.format(table, field_names, ",".join(["?"] * (len(all_attrs[0]) - 1))) - self.logger.info("Adding network links") - self.__emit_all(["text", "Adding network links"]) - try: - conn.executemany(sql, all_attrs) - except Exception as e: - self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args)) - self.logger.error(sql) - raise e - - def __worksetup(self): - self.__link_types = self.project.network.link_types - lts = self.__link_types.all_types() - for lt_id, lt in lts.items(): - self.__model_link_types.append(lt.link_type) - self.__model_link_type_ids.append(lt_id) + del self.node_df + gc.collect() - def __update_table_structure(self, conn): - structure = conn.execute("pragma table_info(Links)").fetchall() - has_fields = [x[1].lower() for x in structure] - fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] - for field in [f for f in fields if f not in has_fields]: - ltype = self.get_link_field_type(field).upper() - conn.execute(f"Alter table Links add column {field} {ltype}") + # But we need to add them back to add the links + add_triggers(conn, self.logger, "network") + + # self.links_df.to_file(self.project.path_to_file, driver="SQLite", spatialite=True, layer="links", mode="a") + + # I could not get the above line to work, so I used the following code instead + self.links_df.index.name = "osm_id" + self.links_df.reset_index(inplace=True) + insert_qry = "INSERT INTO links ({},a_node, b_node, distance, geometry) VALUES({},0,0,0, GeomFromText(?, 4326))" + cols_no_geo = self.links_df.columns.tolist() + cols_no_geo.remove("geometry") + insert_qry = insert_qry.format(", ".join(cols_no_geo), ", ".join(["?"] * len(cols_no_geo))) + + cols = cols_no_geo + ["geometry"] + links_df = self.links_df[cols].to_records(index=False) + + del self.links_df + gc.collect() + self.logger.info("Adding links to file") + self.__emit_all(["text", "Adding links to file"]) + conn.executemany(insert_qry, links_df) + + def _build_geometry(self, nodes: List[int]) -> str: + slice = self.node_df.loc[nodes, :] + txt = ",".join((slice.lon.astype(str) + " " + slice.lat.astype(str)).tolist()) + return f"LINESTRING({txt})" + + def __do_clean(self, conn): + if not self.clean: + conn.execute("VACUUM;") + return + self.logger.info("Cleaning up the network down to the selected area") + links = gpd.GeoDataFrame.from_postgis("SELECT link_id, asBinary(geometry) AS geom FROM links", conn, crs=4326) + existing_link_ids = gpd.sjoin(links, self.model_area, how="left").dropna().link_id.to_numpy() + to_delete = [[x] for x in links[~links.link_id.isin(existing_link_ids)].link_id] + conn.executemany("DELETE FROM links WHERE link_id = ?", to_delete) conn.commit() + conn.execute("VACUUM;") + + def __process_link_chunk(self): + self.logger.info("Processing link modes, types and fields") + self.__emit_all(["text", "Processing link modes, types and fields"]) + + # It is hard to define an optimal chunk_size, so let's assume that 1GB is a good size per chunk + # And let's also assume that each row will be 200 fields at 8 bytes each + # This makes 2Gb roughly equal to 2.6 million rows, so 2 million would so. + chunk_size = 1_000_000 + list_dfs = [self.links_df.iloc[i : i + chunk_size] for i in range(0, self.links_df.shape[0], chunk_size)] + self.links_df = pd.DataFrame([]) + # Initialize link types + with read_and_close(self.project.path_to_file) as conn: + self.__all_ltp = pd.read_sql('SELECT link_type_id, link_type, "" as highway from link_types', conn) + self.__emit_all(["maxValue", len(list_dfs)]) + for i, df in enumerate(list_dfs): + self.logger.info(f"Processing chunk {i + 1}/{len(list_dfs)}") + self.__emit_all(["Value", i]) + if "tags" in df.columns: + # It is critical to reset the index for the concat below to work + df.reset_index(drop=True, inplace=True) + df = pd.concat([df, json_normalize(df["tags"])], axis=1).drop(columns=["tags"]) + df.columns = [x.replace(":", "_") for x in df.columns] + df = self.__build_link_types(df) + df = self.__establish_modes_for_all_links(conn, df) + df = self.__process_link_attributes(df) + else: + self.logger.error("OSM link data does not have tags. Skipping an entire data chunk") + df = pd.DataFrame([]) + list_dfs[i] = df + self.links_df = pd.concat(list_dfs, ignore_index=True) + + def __build_link_types(self, df): + data = [] + df = df.fillna(value={"highway": "missing"}) + df.highway = df.highway.str.lower() + for i, lt in enumerate(df.highway.unique()): + if str(lt) in self.__all_ltp.highway.values: + continue + data.append([*self.__define_link_type(str(lt)), str(lt)]) + self.__all_ltp = pd.concat( + [self.__all_ltp, pd.DataFrame(data, columns=["link_type_id", "link_type", "highway"])] + ) + self.__all_ltp.drop_duplicates(inplace=True) + df = df.merge(self.__all_ltp[["link_type", "highway"]], on="highway", how="left") + return df.drop(columns=["highway"]) - def __build_link_data(self, vars, intersections, i, linknodes, node_ids, fields): - ii = intersections[i] - jj = intersections[i + 1] - all_nodes = [linknodes[x] for x in range(ii, jj + 1)] - - vars["a_node"] = node_ids.get(linknodes[ii], self.node_start) - if vars["a_node"] == self.node_start: - node_ids[linknodes[ii]] = vars["a_node"] - self.node_start += 1 - - vars["b_node"] = node_ids.get(linknodes[jj], self.node_start) - if vars["b_node"] == self.node_start: - node_ids[linknodes[jj]] = vars["b_node"] - self.node_start += 1 - - vars["distance"] = sum( - [ - haversine(self.nodes[x]["lon"], self.nodes[x]["lat"], self.nodes[y]["lon"], self.nodes[y]["lat"]) - for x, y in zip(all_nodes[1:], all_nodes[:-1]) - ] - ) - - geometry = ["{} {}".format(self.nodes[x]["lon"], self.nodes[x]["lat"]) for x in all_nodes] - inside_area = sum([self.nodes[x]["inside_model"] for x in all_nodes]) - if inside_area == 0: - return None - geometry = "LINESTRING ({})".format(", ".join(geometry)) - - attributes = [vars.get(x) for x in fields] - attributes.append(geometry) - return attributes - - def __repair_link_type(self, link_type: str) -> str: + def __define_link_type(self, link_type: str) -> Tuple[str, str]: + proj_link_types = self.project.network.link_types original_link_type = link_type link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() @@ -266,53 +242,110 @@ def __repair_link_type(self, link_type: str) -> str: if piece in ["link", "segment", "stretch"]: link_type = "_".join(split[0 : i + 1]) + if self.__all_ltp.shape[0] >= 51: + link_type = "aggregate_link_type" + if len(link_type) == 0: link_type = "empty" - if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types: - link_type = "aggregate_link_type" - - if link_type in self.__model_link_types: - lt = self.__link_types.get_by_name(link_type) + if link_type in self.__all_ltp.link_type.values: + lt = proj_link_types.get_by_name(link_type) if original_link_type not in lt.description: lt.description += f", {original_link_type}" lt.save() - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [lt.link_type_id, link_type] letter = link_type[0] - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: letter = letter.upper() - if letter in self.__model_link_type_ids: + if letter in self.__all_ltp.link_type_id.values: for letter in string.ascii_letters: - if letter not in self.__model_link_type_ids: + if letter not in self.__all_ltp.link_type_id.values: break - lt = self.__link_types.new(letter) + lt = proj_link_types.new(letter) lt.link_type = link_type lt.description = f"Link types from Open Street Maps: {original_link_type}" lt.save() - self.__model_link_types.append(link_type) - self.__model_link_type_ids.append(letter) - self.__link_type_quick_reference[original_link_type.lower()] = link_type - return link_type + return [letter, link_type] - def __get_link_property(self, d2, val, linktags, v): - vald = linktags.get(f'{v["osm_source"]}:{d2}', val) - if vald is None: - return vald + def __establish_modes_for_all_links(self, conn, df: pd.DataFrame) -> pd.DataFrame: + p = Parameters() + modes = p.parameters["network"]["osm"]["modes"] - if vald.isdigit(): - if vald == val and v["osm_behaviour"] == "divide": - vald = float(val) / 2 - return vald + mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() + mode_codes = {p[0]: p[1] for p in mode_codes} - @staticmethod - def unique_count(a): - # From: https://stackoverflow.com/a/21124789/1480643 - unique, inverse = np.unique(a, return_inverse=True) - count = np.zeros(len(unique), int) - np.add.at(count, inverse, 1) - return np.vstack((unique, count)).T + type_list = {} + notfound = "" + for mode, val in modes.items(): + all_types = val["link_types"] + md = mode_codes[mode] + for tp in all_types: + type_list[tp] = "".join(sorted("{}{}".format(type_list.get(tp, ""), md))) + if val["unknown_tags"]: + notfound += md + + type_list = {k: "".join(set(v)) for k, v in type_list.items()} + + df_aux = pd.DataFrame([[k, v] for k, v in type_list.items()], columns=["link_type", "modes"]) + df = df.merge(df_aux, on="link_type", how="left").fillna(value={"modes": "".join(sorted(notfound))}) + return df + + def __process_link_attributes(self, df: pd.DataFrame) -> pd.DataFrame: + df = df.assign(direction=0, link_id=0) + if "oneway" in df.columns: + df.loc[df.oneway == "yes", "direction"] = 1 + df.loc[df.oneway == "backward", "direction"] = -1 + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + + for x in fields["one-way"]: + if "link_type" in x.keys(): + continue + keys_ = list(x.values())[0] + field = list(x.keys())[0] + osm_name = keys_.get("osm_source", field).replace(":", "_") + df.rename(columns={osm_name: field}, inplace=True, errors="ignore") + + for x in fields["two-way"]: + keys_ = list(x.values())[0] + field = list(x.keys())[0] + if "osm_source" not in keys_: + continue + osm_name = keys_.get("osm_source", field).replace(":", "_") + if osm_name not in df.columns: + continue + df[f"{field}_ba"] = df[osm_name].copy() + df.rename(columns={osm_name: f"{field}_ab"}, inplace=True, errors="ignore") + if "osm_behaviour" in keys_ and keys_["osm_behaviour"] == "divide": + df[f"{field}_ab"] = pd.to_numeric(df[f"{field}_ab"], errors="coerce") + df[f"{field}_ba"] = pd.to_numeric(df[f"{field}_ba"], errors="coerce") + + # Divides the values by 2 or zero them depending on the link direction + df.loc[df.direction == 0, f"{field}_ab"] /= 2 + df.loc[df.direction == -1, f"{field}_ab"] = 0 + + df.loc[df.direction == 0, f"{field}_ba"] /= 2 + df.loc[df.direction == 1, f"{field}_ba"] = 0 + + if f"{field}_forward" in df: + fld = pd.to_numeric(df[f"{field}_forward"], errors="coerce") + df.loc[fld > 0, f"{field}_ab"] = fld[fld > 0] + if f"{field}_backward" in df: + fld = pd.to_numeric(df[f"{field}_backward"], errors="coerce") + df.loc[fld > 0, f"{field}_ba"] = fld[fld > 0] + cols = list_columns(self.project.conn, "links") + ["nodes"] + return df[[x for x in cols if x in df.columns]] + + ######## TABLE STRUCTURE UPDATING ######## + def __update_table_structure(self, conn): + structure = conn.execute("pragma table_info(Links)").fetchall() + has_fields = [x[1].lower() for x in structure] + fields = [field.lower() for field in self.get_link_fields()] + ["osm_id"] + for field in [f for f in fields if f not in has_fields]: + ltype = self.get_link_field_type(field).upper() + conn.execute(f"Alter table Links add column {field} {ltype}") + conn.commit() @staticmethod def get_link_fields(): @@ -339,51 +372,3 @@ def get_link_field_type(field_name): for tp in fields["one-way"]: if field_name in tp: return tp[field_name]["type"] - - @staticmethod - def field_osm_source(): - p = Parameters() - fields = p.parameters["network"]["links"]["fields"] - - owf = { - list(x.keys())[0]: x[list(x.keys())[0]]["osm_source"] - for x in fields["one-way"] - if "osm_source" in x[list(x.keys())[0]] - } - - twf = {} - for x in fields["two-way"]: - if "osm_source" in x[list(x.keys())[0]]: - twf[list(x.keys())[0]] = { - "osm_source": x[list(x.keys())[0]]["osm_source"], - "osm_behaviour": x[list(x.keys())[0]]["osm_behaviour"], - } - return owf, twf - - def modes_per_link_type(self, conn): - p = Parameters() - modes = p.parameters["network"]["osm"]["modes"] - - mode_codes = conn.execute("SELECT mode_name, mode_id from modes").fetchall() - mode_codes = {p[0]: p[1] for p in mode_codes} - - type_list = {} - notfound = "" - for mode, val in modes.items(): - all_types = val["link_types"] - md = mode_codes[mode] - for tp in all_types: - type_list[tp] = "{}{}".format(type_list.get(tp, ""), md) - if val["unknown_tags"]: - notfound += md - - type_list = {k: "".join(set(v)) for k, v in type_list.items()} - - return type_list, "{}".format(notfound) - - @staticmethod - def get_node_fields(): - p = Parameters() - fields = p.parameters["network"]["nodes"]["fields"] - fields = [list(x.keys())[0] for x in fields] - return fields + ["osm_id"] diff --git a/aequilibrae/project/network/osm/osm_downloader.py b/aequilibrae/project/network/osm/osm_downloader.py index 76c810b1e..58b3fd6c9 100644 --- a/aequilibrae/project/network/osm/osm_downloader.py +++ b/aequilibrae/project/network/osm/osm_downloader.py @@ -10,20 +10,21 @@ For the original work, please see https://github.com/gboeing/osmnx """ +import gc +import importlib.util as iutil import logging -import time import re -from typing import List +import time +from typing import List, Dict +import pandas as pd import requests from shapely import Polygon -from .osm_params import http_headers, memory -from aequilibrae.parameters import Parameters from aequilibrae.context import get_logger +from aequilibrae.parameters import Parameters from aequilibrae.utils import WorkerThread -import gc -import importlib.util as iutil +from .osm_params import http_headers, memory spec = iutil.find_spec("PyQt5") pyqt = spec is not None @@ -50,6 +51,9 @@ def __init__(self, polygons: List[Polygon], modes, logger: logging.Logger = None self.overpass_endpoint = par["overpass_endpoint"] self.timeout = par["timeout"] self.sleeptime = par["sleeptime"] + self._nodes = [] + self._links = [] + self.data: Dict[str, pd.DataFrame] = {"nodes": pd.DataFrame([]), "links": pd.DataFrame([])} def doWork(self): infrastructure = 'way["highway"]' @@ -64,7 +68,7 @@ def doWork(self): m = f"[maxsize: {memory}]" for counter, poly in enumerate(self.polygons): msg = f"Downloading polygon {counter + 1} of {len(self.polygons)}" - self.logger.debug(msg) + self.logger.info(msg) self.__emit_all(["Value", counter]) self.__emit_all(["text", msg]) west, south, east, north = poly.bounds @@ -80,10 +84,26 @@ def doWork(self): ) json = self.overpass_request(data={"data": query_str}, timeout=self.timeout) if json["elements"]: - self.json.extend(json["elements"]) - del json - gc.collect() + for tag, lst in [("node", self._nodes), ("way", self._links)]: + df = pd.DataFrame([item for item in json["elements"] if item["type"] == tag]) + if tag == "node": + df = df.assign(is_centroid=0, modes="", link_types="", node_id=0) + lst.append(df) + del json + gc.collect() + self.__emit_all(["Value", len(self.polygons)]) + self.__emit_all(["text", "Downloading finished. Processing data"]) + for lst, table in [(self._links, "links"), (self._nodes, "nodes")]: + df = pd.DataFrame([]) + if len(lst) > 0: + df = pd.concat(lst, ignore_index=True).drop_duplicates(subset=["id"]).drop(columns=["type"]) + if table != "links": + df = df.drop(columns=["tags"], errors="ignore") + self.data[table] = df.rename(columns={"id": "osm_id"}, errors="ignore") + lst.clear() + gc.collect() + self.__emit_all(["FinishedDownloading", 0]) def overpass_request(self, data, pause_duration=None, timeout=180, error_pause_duration=None): diff --git a/aequilibrae/transit/map_matching_graph.py b/aequilibrae/transit/map_matching_graph.py index a1ab70418..56e5610d8 100644 --- a/aequilibrae/transit/map_matching_graph.py +++ b/aequilibrae/transit/map_matching_graph.py @@ -118,8 +118,7 @@ def __build_graph_from_cache(self): def __build_graph_from_scratch(self): self.logger.info(f"Creating map-matching graph from scratch for mode_id={self.mode_id}") - self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=np.nan) - self.df.loc[:, "geo"] = self.df.wkt.apply(shapely.wkt.loads) + self.df = self.df.assign(original_id=self.df.link_id, is_connector=0, geo=self.df.wkt.apply(shapely.wkt.loads)) self.df.loc[self.df.link_id < 0, "link_id"] = self.df.link_id * -1 + self.df.link_id.max() + 1 # We make sure all link IDs are in proper order diff --git a/aequilibrae/utils/db_utils.py b/aequilibrae/utils/db_utils.py index 23534138f..83efb49a2 100644 --- a/aequilibrae/utils/db_utils.py +++ b/aequilibrae/utils/db_utils.py @@ -79,6 +79,10 @@ def get_schema(conn, table_name): return {e.name: e for e in rv} +def list_columns(conn, table_name): + return list(get_schema(conn, table_name).keys()) + + def has_column(conn, table_name, col_name): return col_name in get_schema(conn, table_name) diff --git a/docs/source/_static/switcher.json b/docs/source/_static/switcher.json index 38c10b201..859aa1afc 100644 --- a/docs/source/_static/switcher.json +++ b/docs/source/_static/switcher.json @@ -4,6 +4,11 @@ "version": "develop", "url": "https://aequilibrae.com/python/develop/" }, + { + "name": "1.0.1", + "version": "1.0.1", + "url": "https://aequilibrae.com/python/V.1.0.1/" + }, { "name": "1.0.0", "version": "1.0.0", diff --git a/pyproject.toml b/pyproject.toml index ff2ab1ff1..8fab0fcf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ extend-exclude = '''docs/*''' [build-system] -requires = ["setuptools", "numpy", "cython", "pyaml", "pyqt5", "requests", "scipy", "shapely", "pandas", "pyarrow", "pyproj", "wheel"] +requires = ["setuptools", "numpy", "cython", "pandas", "pyarrow==15.0.0", "wheel"] [tool.ruff] @@ -55,3 +55,49 @@ target-version = "py39" [tool.ruff.mccabe] max-complexity = 20 + +[tool.cibuildwheel] +# While we test this facility we we will only build for 3.10 +#build = ["cp310-*"] +build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"] + +# We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows +skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] +test-skip = "" + +archs = ["auto"] +build-frontend = "default" +config-settings = {} +dependency-versions = "pinned" +environment = {} +environment-pass = [] +build-verbosity = 0 + +before-all = "" +before-build = "" +repair-wheel-command = "" + +test-command = "" +before-test = "" +test-requires = [] +test-extras = [] + +container-engine = "docker" + +manylinux-x86_64-image = "manylinux_2_28" +manylinux-aarch64-image = "manylinux_2_28" +manylinux-pypy_x86_64-image = "manylinux_2_28" +manylinux-pypy_aarch64-image = "manylinux_2_28" + +[tool.cibuildwheel.linux] +archs = ["auto", "aarch64"] +build = ["cp310-*", "cp311-*", "cp312-*"] +repair-wheel-command = [ + "auditwheel repair -w {dest_dir} {wheel} --exclude libarrow.so.1500 --exclude libarrow_python.so" +] + +[tool.cibuildwheel.macos] +#archs = ["x86_64", "universal2", "arm64"] +archs = ["x86_64"] +environment = { CC="gcc-12", CXX="g++-12" } +repair-wheel-command = "delocate-wheel -vv --ignore-missing-dependencies --require-archs {delocate_archs} -w {dest_dir} -v {wheel}" diff --git a/requirements.txt b/requirements.txt index d22ed0b9d..876332710 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,6 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +pyarrow==15.0.0 +geopandas diff --git a/requirements_additional.txt b/requirements_additional.txt index 96f666a5e..2d07ca02a 100644 --- a/requirements_additional.txt +++ b/requirements_additional.txt @@ -1,2 +1 @@ -pyarrow -PyQt5 \ No newline at end of file +PyQt5 diff --git a/setup.py b/setup.py index 852b82dff..b68b463a6 100644 --- a/setup.py +++ b/setup.py @@ -12,10 +12,15 @@ exec(f.read()) include_dirs = [np.get_include()] +libraries = [] +library_dirs = [] if iutil.find_spec("pyarrow") is not None: import pyarrow as pa + pa.create_library_symlinks() include_dirs.append(pa.get_include()) + libraries.extend(pa.get_libraries()) + library_dirs.extend(pa.get_library_dirs()) is_win = "WINDOWS" in platform.platform().upper() is_mac = any(e in platform.platform().upper() for e in ["MACOS", "DARWIN"]) @@ -62,8 +67,11 @@ extra_link_args=link_args, define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], include_dirs=include_dirs, + libraries=libraries, + library_dirs=library_dirs, language="c++", ) + ext_mod_graph_building = Extension( "aequilibrae.paths.graph_building", [join("aequilibrae", "paths", "graph_building.pyx")], diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 0c2a9447c..b68f64d97 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -6,6 +6,7 @@ from unittest import TestCase import pandas as pd import numpy as np +import pyarrow as pa from aequilibrae import Graph, Project from aequilibrae.paths.route_choice import RouteChoiceSet @@ -36,56 +37,62 @@ def test_route_choice(self): rc = RouteChoiceSet(self.graph) a, b = 1, 20 - results = rc.run(a, b, max_routes=10, max_depth=0) - self.assertEqual(len(results), 10, "Returned more routes than expected") - self.assertEqual(len(results), len(set(results)), "Returned duplicate routes") - - # With a depth 1 only one path will be found - results = rc.run(a, b, max_routes=0, max_depth=1) - self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") - self.assertListEqual(results, [(58, 52, 29, 24, 12, 8, 5, 1)], "Initial route isn't the shortest A* route") + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + results = rc.run(a, b, max_routes=10, **kwargs) + self.assertEqual(len(results), 10, "Returned more routes than expected") + self.assertEqual(len(results), len(set(results)), "Returned duplicate routes") + + # With a depth 1 only one path will be found + results = rc.run(a, b, max_routes=0, max_depth=1) + self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") + self.assertListEqual( + results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route" + ) - # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths - results2 = rc.run(a, b, max_routes=0, max_depth=2) - self.assertEqual( - len(results2), 1 + len(results[0]) - 4, "Depth of 2 didn't yield the expected number of routes" - ) - self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") + # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths + results2 = rc.run(a, b, max_routes=0, max_depth=2, **kwargs) + self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") self.assertListEqual( - rc.run(a, b, max_routes=0, max_depth=2, seed=0), - rc.run(a, b, max_routes=0, max_depth=2, seed=10), + rc.run(a, b, max_routes=0, seed=0, max_depth=2), + rc.run(a, b, max_routes=0, seed=10, max_depth=2), "Seeded and unseeded results differ with unlimited `max_routes` (queue is incorrectly being shuffled)", ) self.assertNotEqual( - rc.run(a, b, max_routes=3, max_depth=2, seed=0), - rc.run(a, b, max_routes=3, max_depth=2, seed=10), + rc.run(a, b, max_routes=3, seed=0, max_depth=2), + rc.run(a, b, max_routes=3, seed=10, max_depth=2), "Seeded and unseeded results don't differ with limited `max_routes` (queue is not being shuffled)", ) def test_route_choice_empty_path(self): - rc = RouteChoiceSet(self.graph) - a = 1 - - self.assertEqual( - rc.batched([(a, a)], max_routes=0, max_depth=3), {(a, a): []}, "Route set from self to self should be empty" - ) + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + rc = RouteChoiceSet(self.graph) + a = 1 + + self.assertFalse( + rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs), + "Route set from self to self should be empty", + ) def test_route_choice_blocking_centroids(self): - a, b = 1, 20 + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + with self.subTest(**kwargs): + a, b = 1, 20 - self.graph.set_blocked_centroid_flows(False) - rc = RouteChoiceSet(self.graph) + self.graph.set_blocked_centroid_flows(False) + rc = RouteChoiceSet(self.graph) - results = rc.run(a, b, max_routes=2, max_depth=2) - self.assertNotEqual(results, [], "Unblocked centroid flow found no paths") + results = rc.run(a, b, max_routes=2, max_depth=2, **kwargs) + self.assertNotEqual(results, [], "Unblocked centroid flow found no paths") - self.graph.set_blocked_centroid_flows(True) - rc = RouteChoiceSet(self.graph) + self.graph.set_blocked_centroid_flows(True) + rc = RouteChoiceSet(self.graph) - results = rc.run(a, b, max_routes=2, max_depth=2) - self.assertListEqual(results, [], "Blocked centroid flow found a path") + results = rc.run(a, b, max_routes=2, max_depth=2, **kwargs) + self.assertListEqual(results, [], "Blocked centroid flow found a path") def test_route_choice_batched(self): np.random.seed(0) @@ -93,13 +100,28 @@ def test_route_choice_batched(self): nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] max_routes = 20 - results = rc.batched(nodes, max_routes=max_routes, max_depth=10, cores=1) + results = rc.batched(nodes, max_routes=max_routes, max_depth=10) - self.assertEqual(len(results), len(nodes), "Requested number of route sets not returned") + gb = results.to_pandas().groupby(by="origin id") + self.assertEqual(len(gb), len(nodes), "Requested number of route sets not returned") - for od, route_set in results.items(): - self.assertEqual(len(route_set), len(set(route_set)), f"Duplicate routes returned for {od}") - self.assertEqual(len(route_set), max_routes, f"Requested number of routes not returned for {od}") + for _, row in gb: + self.assertFalse(any(row["route set"].duplicated()), f"Duplicate routes returned for {row['origin id']}") + self.assertEqual( + len(row["route set"]), max_routes, f"Requested number of routes not returned for {row['origin id']}" + ) + + def test_route_choice_duplicates_batched(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [(1, 20)] * 5 + + max_routes = 20 + with self.assertWarns(UserWarning): + results = rc.batched(nodes, max_routes=max_routes, max_depth=10) + + gb = results.to_pandas().groupby(by="origin id") + self.assertEqual(len(gb), 1, "Duplicates not dropped") def test_route_choice_exceptions(self): rc = RouteChoiceSet(self.graph) @@ -116,6 +138,67 @@ def test_route_choice_exceptions(self): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) + with self.assertRaises(ValueError): + rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5) + rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1) + + def test_round_trip(self): + np.random.seed(1000) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + + max_routes = 20 + + path = join(self.project.project_base_path, "batched results") + table = rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path) + + dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) + new_table = ( + dataset.to_table() + .to_pandas() + .sort_values(by=["origin id", "destination id"])[["origin id", "destination id", "route set"]] + .reset_index(drop=True) + ) + + table = table.to_pandas().sort_values(by=["origin id", "destination id"]).reset_index(drop=True) + + pd.testing.assert_frame_equal(table, new_table) + + def test_cost_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + table = table.to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, df in gb: + for route, cost in zip(df["route set"].values, df["cost"].values): + np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + + def test_gamma_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + table = table.to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, df in gb: + self.assertTrue(all((df["gamma"] > 0) & (df["gamma"] <= 1))) + + def test_prob_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + table = table.to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, df in gb: + self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0") + def generate_line_strings(project, graph, results): """Debug method""" diff --git a/tests/aequilibrae/project/data/porto_rico.parquet b/tests/aequilibrae/project/data/porto_rico.parquet new file mode 100644 index 000000000..97dfe198c Binary files /dev/null and b/tests/aequilibrae/project/data/porto_rico.parquet differ diff --git a/tests/aequilibrae/project/data/wynnum.parquet b/tests/aequilibrae/project/data/wynnum.parquet new file mode 100644 index 000000000..9c9eeb46c Binary files /dev/null and b/tests/aequilibrae/project/data/wynnum.parquet differ diff --git a/tests/aequilibrae/project/test_osm_downloader.py b/tests/aequilibrae/project/test_osm_downloader.py index 706c04989..7c5fe3bb7 100644 --- a/tests/aequilibrae/project/test_osm_downloader.py +++ b/tests/aequilibrae/project/test_osm_downloader.py @@ -32,7 +32,7 @@ def test_do_work2(self): o = OSMDownloader([box(-112.185, 36.59, -112.179, 36.60)], ["car"]) o.doWork() - if "elements" not in o.json[0]: + if len(o.json) == 0 or "elements" not in o.json[0]: return if len(o.json[0]["elements"]) > 1000: diff --git a/tests/aequilibrae/project/test_polygon_gridding.py b/tests/aequilibrae/project/test_polygon_gridding.py new file mode 100644 index 000000000..21dd40562 --- /dev/null +++ b/tests/aequilibrae/project/test_polygon_gridding.py @@ -0,0 +1,22 @@ +from pathlib import Path + +import geopandas as gpd +import pytest + +from aequilibrae.project.network.osm.model_area_gridding import geometry_grid + + +def test_geometry_grid(): + pth = Path(__file__).parent / "data" + + # Small simple polygon + polygon = gpd.read_parquet(pth / "wynnum.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 1 + + # Bigger polygon + polygon = gpd.read_parquet(pth / "porto_rico.parquet").geometry[0] + grid = geometry_grid(polygon, 4326) + assert grid.geometry.area.sum() == pytest.approx(polygon.area, 0.000000001) + assert grid.shape[0] == 16