diff --git a/.gitignore b/.gitignore index d2631e985..a421a2b63 100644 --- a/.gitignore +++ b/.gitignore @@ -96,3 +96,5 @@ build/ coverage.xml #### End snippet +tests/data/overture/theme=transportation/type=connector/transportation_data_connector.parquet +tests/data/overture/theme=transportation/type=segment/transportation_data_segment.parquet diff --git a/aequilibrae/parameters.yml b/aequilibrae/parameters.yml index d7fe1be3e..de2fc937e 100644 --- a/aequilibrae/parameters.yml +++ b/aequilibrae/parameters.yml @@ -230,6 +230,72 @@ network: mode_filter: pedestrian: 'no' unknown_tags: true + ovm: + all_link_types: + - bridleway + - cycleway + - driveway + - footway + - livingStreet + - motorway + - parkingAisle + - pedestrian + - primary + - residential + - secondary + - steps + - tertiary + - track + - trunk + - unclassified + - unknown + modes: + bicycle: + link_types: + - primary + - secondary + - tertiary + - livingStreet + - parkingAisle + - residential + - cycleway + - pedestrian + - track + - unclassified + unknown_tags: true + car: + link_types: + - motorway + - trunk + - primary + - secondary + - tertiary + - unclassified + - residential + - livingStreet + - parkingAisle + unknown_tags: true + transit: + link_types: + - motorway + - trunk + - primary + - secondary + - tertiary + - unclassified + - residential + - livingStreet + unknown_tags: true + walk: + link_types: + - cycleway + - footway + - steps + - pedestrian + - track + - bridleway + - unclassified + unknown_tags: true gmns: critical_dist: 2 node: diff --git a/aequilibrae/project/network/download_parquet.ipynb b/aequilibrae/project/network/download_parquet.ipynb new file mode 100644 index 000000000..d87a55808 --- /dev/null +++ b/aequilibrae/project/network/download_parquet.ipynb @@ -0,0 +1,1608 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import time\n", + "import re\n", + "from pathlib import Path\n", + "\n", + "import requests\n", + "from aequilibrae.parameters import Parameters\n", + "from aequilibrae.context import get_logger\n", + "import gc\n", + "import importlib.util as iutil\n", + "from aequilibrae.utils import WorkerThread\n", + "\n", + "import duckdb\n", + "import geopandas as gpd\n", + "import subprocess\n", + "import os\n", + "from typing import Union" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def initialise_duckdb_spatial():\n", + " conn = duckdb.connect()\n", + " c = conn.cursor()\n", + "\n", + " c.execute(\n", + " \"\"\"INSTALL spatial; \n", + " INSTALL httpfs;\n", + " INSTALL parquet;\n", + " \"\"\"\n", + " )\n", + " c.execute(\n", + " \"\"\"LOAD spatial;\n", + " LOAD parquet;\n", + " SET s3_region='us-west-2';\n", + " \"\"\"\n", + " )\n", + " return c\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Count
0281
\n", + "
" + ], + "text/plain": [ + " Count\n", + "0 281" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " COPY(\n", + " SELECT *\n", + " FROM read_parquet('{pth}/*', union_by_name=True)\n", + " WHERE bbox.minx > '{airlie_bbox[0]}'\n", + " AND bbox.maxx < '{airlie_bbox[2]}'\n", + " AND bbox.miny > '{airlie_bbox[1]}'\n", + " AND bbox.maxy < '{airlie_bbox[3]}')\n", + " TO '{pth2}'\n", + " (FORMAT 'parquet', COMPRESSION 'ZSTD')\n", + "\"\"\"\n", + "c = initialise_duckdb_spatial()\n", + "g = c.execute(sql)\n", + "g \n", + "g.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " COPY(\n", + " SELECT \n", + " ST_GeomFromWKB(geometry) AS geom,\n", + " *\n", + " FROM read_parquet('{pth}/*', union_by_name=True, hive_partitioning=1)\n", + " WHERE bbox.minx > '{airlie_bbox[0]}'\n", + " AND bbox.maxx < '{airlie_bbox[2]}'\n", + " AND bbox.miny > '{airlie_bbox[1]}'\n", + " AND bbox.maxy < '{airlie_bbox[3]}')\n", + " TO '{pth2}'\n", + " WITH (FORMAT 'parquet', CODEC 'ZSTD')\n", + "\"\"\"\n", + "c = initialise_duckdb_spatial()\n", + "g2 = c.execute(sql)\n", + "g2\n", + "g2.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "g2.df()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "Query interrupted", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: ", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[6], line 18\u001b[0m\n\u001b[0;32m 5\u001b[0m sql \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;124m COPY(\u001b[39m\n\u001b[0;32m 7\u001b[0m \u001b[38;5;124m SELECT \u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[38;5;124m WITH (FORMAT \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparquet\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, CODEC \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mZSTD\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m)\u001b[39m\n\u001b[0;32m 16\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m 17\u001b[0m c \u001b[38;5;241m=\u001b[39m initialise_duckdb_spatial()\n\u001b[1;32m---> 18\u001b[0m g2 \u001b[38;5;241m=\u001b[39m \u001b[43mc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43msql\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 19\u001b[0m g2\n\u001b[0;32m 20\u001b[0m g2\u001b[38;5;241m.\u001b[39mdf()\n", + "\u001b[1;31mRuntimeError\u001b[0m: Query interrupted" + ] + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation3.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " COPY(\n", + " SELECT \n", + " *\n", + " FROM read_parquet('{pth}/*', union_by_name=True, hive_partitioning=1)\n", + " WHERE bbox.minx > '{airlie_bbox[0]}'\n", + " AND bbox.maxx < '{airlie_bbox[2]}'\n", + " AND bbox.miny > '{airlie_bbox[1]}'\n", + " AND bbox.maxy < '{airlie_bbox[3]}')\n", + " TO '{pth2}'\n", + " WITH (FORMAT 'parquet', CODEC 'ZSTD')\n", + "\"\"\"\n", + "c = initialise_duckdb_spatial()\n", + "g2 = c.execute(sql)\n", + "g2\n", + "g2.df()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Count
088731
\n", + "
" + ], + "text/plain": [ + " Count\n", + "0 88731" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " COPY(\n", + " SELECT *\n", + " FROM read_parquet('{pth}', union_by_name=True))\n", + " TO '{pth2}'\n", + " (FORMAT 'parquet', COMPRESSION 'ZSTD')\n", + "\"\"\"\n", + "initialise_duckdb_spatial().execute(sql).df()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geometry
0[0, 0, 0, 0, 2, 0, 0, 0, 2, 64, 97, 27, 247, 1...
\n", + "
" + ], + "text/plain": [ + " geometry\n", + "0 [0, 0, 0, 0, 2, 0, 0, 0, 2, 64, 97, 27, 247, 1..." + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " \n", + " SELECT geometry\n", + " FROM read_parquet('{pth}', union_by_name=True)\n", + "\"\"\"\n", + "initialise_duckdb_spatial().execute(sql).df()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LINESTRING (136.879572 37.23146, 136.880325 37.231763, 136.880494 37.231853, 136.880677 37.231988, 136.880733 37.232039, 136.880768 37.232095, 136.880809 37.232265, 136.880852 37.232343, 136.880929 37.232417, 136.881007 37.232456, 136.8812762 37.2325315, 136.881387 37.232569, 136.881471 37.232619, 136.881745 37.232873, 136.881992 37.23307, 136.8821814 37.2332027, 136.8822227 37.233222, 136.8828104 37.233415, 136.883195 37.233544, 136.8833539 37.2336068)\n" + ] + } + ], + "source": [ + "from shapely.geometry import LineString\n", + "import shapely.wkb as wkb\n", + "\n", + "# Assuming 'serialized_geometry' is the given serialized format\n", + "serialized_geometry = bytearray(b'\\x00\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x14@a\\x1c%t-\\xcfF@B\\x9d\\xa0{5*\\x84@a\\x1c+\\x9fU\\x9b=@B\\x9d\\xaah\\xf4\\xb6 @a\\x1c-\\x01\\xc0\\xca`@B\\x9d\\xad[\\xee=`@a\\x1c.\\x81\\x88*\\xdc@B\\x9d\\xb1\\xc8d\\x88@@a\\x1c.\\xf6\\xf8\\xf0A@B\\x9d\\xb3t62\\xc2@a\\x1c/@_k\\xa0@B\\x9d\\xb5I\\xf9HV@a\\x1c/\\x96[ \\xb8@B\\x9d\\xba\\xdc\\t\\x80\\xb2@a\\x1c/\\xf0\\x88\\x93\\xb8@B\\x9d\\xbdjY:.@a\\x1c0\\x92\\x03\\xa3#@B\\x9d\\xbf\\xd7\\x1b\\x04h@a\\x1c15\\x97\\x91\\x82@B\\x9d\\xc1\\x1eB\\xe1&@a\\x1c3j%7\\xc8@B\\x9d\\xc3\\x97\\x99\\xe5\\x19@a\\x1c4R\\x82\\x83\\xd3@B\\x9d\\xc4\\xd2,\\x88\\x1e@a\\x1c5\\x02\\xab\\xab\\xeb@B\\x9d\\xc6u\\x9a\\xb6\\xd0@a\\x1c7AJM+@B\\x9d\\xce\\xc8O\\x8f\\x8a@a\\x1c9GIj\\xad@B\\x9d\\xd5<\\xdd\\xd6\\xe0@a\\x1c:\\xd4|\\xc4w@B\\x9d\\xd9\\x96\\x08\\xeba@a\\x1c;+\\x19\\x89>@B\\x9d\\xda7\\xefZ\\x96@a\\x1c?\\xfb\\x98\\x923@B\\x9d\\xe0\\x8a\\xef\\xb2\\xab@a\\x1cC\")\\x1f\\xb4@B\\x9d\\xe4\\xc5\\x11\\x16\\xa9@a\\x1cDoe\\xe9i@B\\x9d\\xe6\\xd3\\xdf\\x0f\\xc5')\n", + "\n", + "# Convert to bytes\n", + "serialized_geometry_bytes = bytes(serialized_geometry)\n", + "\n", + "# Use shapely.wkb.loads to convert the serialized geometry to a LineString\n", + "line_string = wkb.loads(serialized_geometry_bytes)\n", + "\n", + "# Print the LineString\n", + "print(line_string)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " \n", + " SELECT geometry\n", + " FROM read_parquet('{pth}', union_by_name=True)\n", + "\"\"\"\n", + "g = initialise_duckdb_spatial().execute(sql).df()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgeometrybboxsubTypelocalityTypenamescontextIdadminLevelisoCountryCodeAlpha2isoSubCountryCode...socialsemailsphonesbrandaddressessourceTagswikidatasurfaceisSaltisIntermittent
08a2e712b282ffff-17DFF24660C0674BLINESTRING (136.87391 37.21506, 136.87418 37.2...{'minx': 136.8739104, 'maxx': 136.8741804, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
1882e7174d3fffff-13FF4449D72183C2LINESTRING (136.87957 37.23146, 136.88032 37.2...{'minx': 136.879572, 'maxx': 136.8833539, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
2892e7174d33ffff-17FE1106542131D2LINESTRING (136.88655 37.22834, 136.88672 37.2...{'minx': 136.886546, 'maxx': 136.886733, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
3872e71749ffffff-13CDE03A24646A31LINESTRING (136.86781 37.24933, 136.86757 37.2...{'minx': 136.865495, 'maxx': 136.867811, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
4872e7174dffffff-13FFCFC79C7F8D41LINESTRING (136.88335 37.23361, 136.88340 37.2...{'minx': 136.8833539, 'maxx': 136.8888252, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
..................................................................
887268804ebaf6dfffff-157B56F2364053C0LINESTRING (171.81161 69.73536, 171.81055 69.7...{'minx': 171.8036842, 'maxx': 171.8116128, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
887278804ebaf61fffff-159FD531CC11F67BLINESTRING (171.83673 69.73090, 171.83653 69.7...{'minx': 171.8275881, 'maxx': 171.836729, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
887288704ebaf6ffffff-15BFEADF6048761DLINESTRING (171.82734 69.72528, 171.82774 69.7...{'minx': 171.8273413, 'maxx': 171.8530772, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
88729860db679fffffff-17EDF07478DAA881LINESTRING (173.78101 68.98210, 173.77863 68.9...{'minx': 173.7595158, 'maxx': 173.7810087, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
88730850db6c7fffffff-17ACF6D5C1797FD5LINESTRING (173.54518 69.69184, 173.54693 69.6...{'minx': 173.5451782, 'maxx': 173.6200343, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n", + "

88731 rows × 37 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 8a2e712b282ffff-17DFF24660C0674B \n", + "1 882e7174d3fffff-13FF4449D72183C2 \n", + "2 892e7174d33ffff-17FE1106542131D2 \n", + "3 872e71749ffffff-13CDE03A24646A31 \n", + "4 872e7174dffffff-13FFCFC79C7F8D41 \n", + "... ... \n", + "88726 8804ebaf6dfffff-157B56F2364053C0 \n", + "88727 8804ebaf61fffff-159FD531CC11F67B \n", + "88728 8704ebaf6ffffff-15BFEADF6048761D \n", + "88729 860db679fffffff-17EDF07478DAA881 \n", + "88730 850db6c7fffffff-17ACF6D5C1797FD5 \n", + "\n", + " geometry \\\n", + "0 LINESTRING (136.87391 37.21506, 136.87418 37.2... \n", + "1 LINESTRING (136.87957 37.23146, 136.88032 37.2... \n", + "2 LINESTRING (136.88655 37.22834, 136.88672 37.2... \n", + "3 LINESTRING (136.86781 37.24933, 136.86757 37.2... \n", + "4 LINESTRING (136.88335 37.23361, 136.88340 37.2... \n", + "... ... \n", + "88726 LINESTRING (171.81161 69.73536, 171.81055 69.7... \n", + "88727 LINESTRING (171.83673 69.73090, 171.83653 69.7... \n", + "88728 LINESTRING (171.82734 69.72528, 171.82774 69.7... \n", + "88729 LINESTRING (173.78101 68.98210, 173.77863 68.9... \n", + "88730 LINESTRING (173.54518 69.69184, 173.54693 69.6... \n", + "\n", + " bbox subType localityType \\\n", + "0 {'minx': 136.8739104, 'maxx': 136.8741804, 'mi... road None \n", + "1 {'minx': 136.879572, 'maxx': 136.8833539, 'min... road None \n", + "2 {'minx': 136.886546, 'maxx': 136.886733, 'miny... road None \n", + "3 {'minx': 136.865495, 'maxx': 136.867811, 'miny... road None \n", + "4 {'minx': 136.8833539, 'maxx': 136.8888252, 'mi... road None \n", + "... ... ... ... \n", + "88726 {'minx': 171.8036842, 'maxx': 171.8116128, 'mi... road None \n", + "88727 {'minx': 171.8275881, 'maxx': 171.836729, 'min... road None \n", + "88728 {'minx': 171.8273413, 'maxx': 171.8530772, 'mi... road None \n", + "88729 {'minx': 173.7595158, 'maxx': 173.7810087, 'mi... road None \n", + "88730 {'minx': 173.5451782, 'maxx': 173.6200343, 'mi... road None \n", + "\n", + " names contextId adminLevel isoCountryCodeAlpha2 isoSubCountryCode ... \\\n", + "0 None None NaN None None ... \n", + "1 None None NaN None None ... \n", + "2 None None NaN None None ... \n", + "3 None None NaN None None ... \n", + "4 None None NaN None None ... \n", + "... ... ... ... ... ... ... \n", + "88726 None None NaN None None ... \n", + "88727 None None NaN None None ... \n", + "88728 None None NaN None None ... \n", + "88729 None None NaN None None ... \n", + "88730 None None NaN None None ... \n", + "\n", + " socials emails phones brand addresses sourceTags wikidata surface \\\n", + "0 None None None None None None None None \n", + "1 None None None None None None None None \n", + "2 None None None None None None None None \n", + "3 None None None None None None None None \n", + "4 None None None None None None None None \n", + "... ... ... ... ... ... ... ... ... \n", + "88726 None None None None None None None None \n", + "88727 None None None None None None None None \n", + "88728 None None None None None None None None \n", + "88729 None None None None None None None None \n", + "88730 None None None None None None None None \n", + "\n", + " isSalt isIntermittent \n", + "0 None None \n", + "1 None None \n", + "2 None None \n", + "3 None None \n", + "4 None None \n", + "... ... ... \n", + "88726 None None \n", + "88727 None None \n", + "88728 None None \n", + "88729 None None \n", + "88730 None None \n", + "\n", + "[88731 rows x 37 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "gdf = gpd.read_parquet(pth)\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 LINESTRING (-176.53736 -43.88630, -176.53686 -...\n", + "Name: geometry, dtype: geometry" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf[: 1][\"geometry\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgeometrybboxsubTypelocalityTypenamescontextIdadminLevelisoCountryCodeAlpha2isoSubCountryCode...socialsemailsphonesbrandaddressessourceTagswikidatasurfaceisSaltisIntermittent
1882e7174d3fffff-13FF4449D72183C2LINESTRING (136.87957 37.23146, 136.88032 37.2...{'minx': 136.879572, 'maxx': 136.8833539, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
2892e7174d33ffff-17FE1106542131D2LINESTRING (136.88655 37.22834, 136.88672 37.2...{'minx': 136.886546, 'maxx': 136.886733, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
3872e71749ffffff-13CDE03A24646A31LINESTRING (136.86781 37.24933, 136.86757 37.2...{'minx': 136.865495, 'maxx': 136.867811, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
4872e7174dffffff-13FFCFC79C7F8D41LINESTRING (136.88335 37.23361, 136.88340 37.2...{'minx': 136.8833539, 'maxx': 136.8888252, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n", + "

4 rows × 37 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "1 882e7174d3fffff-13FF4449D72183C2 \n", + "2 892e7174d33ffff-17FE1106542131D2 \n", + "3 872e71749ffffff-13CDE03A24646A31 \n", + "4 872e7174dffffff-13FFCFC79C7F8D41 \n", + "\n", + " geometry \\\n", + "1 LINESTRING (136.87957 37.23146, 136.88032 37.2... \n", + "2 LINESTRING (136.88655 37.22834, 136.88672 37.2... \n", + "3 LINESTRING (136.86781 37.24933, 136.86757 37.2... \n", + "4 LINESTRING (136.88335 37.23361, 136.88340 37.2... \n", + "\n", + " bbox subType localityType \\\n", + "1 {'minx': 136.879572, 'maxx': 136.8833539, 'min... road None \n", + "2 {'minx': 136.886546, 'maxx': 136.886733, 'miny... road None \n", + "3 {'minx': 136.865495, 'maxx': 136.867811, 'miny... road None \n", + "4 {'minx': 136.8833539, 'maxx': 136.8888252, 'mi... road None \n", + "\n", + " names contextId adminLevel isoCountryCodeAlpha2 isoSubCountryCode ... \\\n", + "1 None None NaN None None ... \n", + "2 None None NaN None None ... \n", + "3 None None NaN None None ... \n", + "4 None None NaN None None ... \n", + "\n", + " socials emails phones brand addresses sourceTags wikidata surface isSalt \\\n", + "1 None None None None None None None None None \n", + "2 None None None None None None None None None \n", + "3 None None None None None None None None None \n", + "4 None None None None None None None None None \n", + "\n", + " isIntermittent \n", + "1 None \n", + "2 None \n", + "3 None \n", + "4 None \n", + "\n", + "[4 rows x 37 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = gdf[1:5]\n", + "test" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "test.to_parquet(pth,compression='zstd')" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import geopandas as gpd\n", + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_parquet.zstd.parquet'\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ]\n", + "\n", + "sql = f\"\"\"\n", + " \n", + " SELECT *\n", + " FROM read_parquet('{pth}', union_by_name=True)\n", + "\"\"\"\n", + "initialise_duckdb_spatial().execute(sql)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import geopandas as gpd\n", + "\n", + "# Set the path to the 'parquet_data' directory\n", + "pth = r'E:\\theme=transportation\\type=segment'\n", + "pth2 = r'C:\\Users\\penny\\git\\Aequilibrae\\theme=transportation\\type=segment\\transportation_data_files.zstd.parquet'\n", + "\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621]\n", + "\n", + "\n", + "# Loop through all the parquet files in the 'parquet_data' directory\n", + "for file in os.listdir(pth.replace(\"\\\\\", \"/\")):\n", + " if file.endswith('.parquet'):\n", + " # Read the parquet file into a geopandas GeoDataFrame\n", + " gdf = gpd.read_parquet(os.path.join(pth, file))\n", + "\n", + " # Filter the data using the WHERE command\n", + " filtered_gdf = gdf[gdf['bbox'].apply(lambda bbox: airlie_bbox[0] <= bbox['minx'] <= airlie_bbox[2] and\n", + " airlie_bbox[0] <= bbox['maxx'] <= airlie_bbox[2] and\n", + " airlie_bbox[1] <= bbox['miny'] <= airlie_bbox[3] and\n", + " airlie_bbox[1] <= bbox['maxy'] <= airlie_bbox[3])]\n", + "\n", + " # Save the filtered data to a new parquet file\n", + " filtered_gdf.to_parquet(pth2, compression='zstd')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'minx': -176.236521,\n", + " 'maxx': -176.236365,\n", + " 'miny': -44.2445032,\n", + " 'maxy': -44.2442786}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# for i in 1:gdf.length:\n", + "gdf['bbox'][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgeometrybboxsubTypelocalityTypenamescontextIdadminLevelisoCountryCodeAlpha2isoSubCountryCode...socialsemailsphonesbrandaddressessourceTagswikidatasurfaceisSaltisIntermittent
08a2e712b282ffff-17DFF24660C0674BLINESTRING (136.87391 37.21506, 136.87418 37.2...{'minx': 136.8739104, 'maxx': 136.8741804, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
1882e7174d3fffff-13FF4449D72183C2LINESTRING (136.87957 37.23146, 136.88032 37.2...{'minx': 136.879572, 'maxx': 136.8833539, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
2892e7174d33ffff-17FE1106542131D2LINESTRING (136.88655 37.22834, 136.88672 37.2...{'minx': 136.886546, 'maxx': 136.886733, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
3872e71749ffffff-13CDE03A24646A31LINESTRING (136.86781 37.24933, 136.86757 37.2...{'minx': 136.865495, 'maxx': 136.867811, 'miny...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
4872e7174dffffff-13FFCFC79C7F8D41LINESTRING (136.88335 37.23361, 136.88340 37.2...{'minx': 136.8833539, 'maxx': 136.8888252, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
..................................................................
887268804ebaf6dfffff-157B56F2364053C0LINESTRING (171.81161 69.73536, 171.81055 69.7...{'minx': 171.8036842, 'maxx': 171.8116128, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
887278804ebaf61fffff-159FD531CC11F67BLINESTRING (171.83673 69.73090, 171.83653 69.7...{'minx': 171.8275881, 'maxx': 171.836729, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
887288704ebaf6ffffff-15BFEADF6048761DLINESTRING (171.82734 69.72528, 171.82774 69.7...{'minx': 171.8273413, 'maxx': 171.8530772, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
88729860db679fffffff-17EDF07478DAA881LINESTRING (173.78101 68.98210, 173.77863 68.9...{'minx': 173.7595158, 'maxx': 173.7810087, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
88730850db6c7fffffff-17ACF6D5C1797FD5LINESTRING (173.54518 69.69184, 173.54693 69.6...{'minx': 173.5451782, 'maxx': 173.6200343, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n", + "

88731 rows × 37 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 8a2e712b282ffff-17DFF24660C0674B \n", + "1 882e7174d3fffff-13FF4449D72183C2 \n", + "2 892e7174d33ffff-17FE1106542131D2 \n", + "3 872e71749ffffff-13CDE03A24646A31 \n", + "4 872e7174dffffff-13FFCFC79C7F8D41 \n", + "... ... \n", + "88726 8804ebaf6dfffff-157B56F2364053C0 \n", + "88727 8804ebaf61fffff-159FD531CC11F67B \n", + "88728 8704ebaf6ffffff-15BFEADF6048761D \n", + "88729 860db679fffffff-17EDF07478DAA881 \n", + "88730 850db6c7fffffff-17ACF6D5C1797FD5 \n", + "\n", + " geometry \\\n", + "0 LINESTRING (136.87391 37.21506, 136.87418 37.2... \n", + "1 LINESTRING (136.87957 37.23146, 136.88032 37.2... \n", + "2 LINESTRING (136.88655 37.22834, 136.88672 37.2... \n", + "3 LINESTRING (136.86781 37.24933, 136.86757 37.2... \n", + "4 LINESTRING (136.88335 37.23361, 136.88340 37.2... \n", + "... ... \n", + "88726 LINESTRING (171.81161 69.73536, 171.81055 69.7... \n", + "88727 LINESTRING (171.83673 69.73090, 171.83653 69.7... \n", + "88728 LINESTRING (171.82734 69.72528, 171.82774 69.7... \n", + "88729 LINESTRING (173.78101 68.98210, 173.77863 68.9... \n", + "88730 LINESTRING (173.54518 69.69184, 173.54693 69.6... \n", + "\n", + " bbox subType localityType \\\n", + "0 {'minx': 136.8739104, 'maxx': 136.8741804, 'mi... road None \n", + "1 {'minx': 136.879572, 'maxx': 136.8833539, 'min... road None \n", + "2 {'minx': 136.886546, 'maxx': 136.886733, 'miny... road None \n", + "3 {'minx': 136.865495, 'maxx': 136.867811, 'miny... road None \n", + "4 {'minx': 136.8833539, 'maxx': 136.8888252, 'mi... road None \n", + "... ... ... ... \n", + "88726 {'minx': 171.8036842, 'maxx': 171.8116128, 'mi... road None \n", + "88727 {'minx': 171.8275881, 'maxx': 171.836729, 'min... road None \n", + "88728 {'minx': 171.8273413, 'maxx': 171.8530772, 'mi... road None \n", + "88729 {'minx': 173.7595158, 'maxx': 173.7810087, 'mi... road None \n", + "88730 {'minx': 173.5451782, 'maxx': 173.6200343, 'mi... road None \n", + "\n", + " names contextId adminLevel isoCountryCodeAlpha2 isoSubCountryCode ... \\\n", + "0 None None NaN None None ... \n", + "1 None None NaN None None ... \n", + "2 None None NaN None None ... \n", + "3 None None NaN None None ... \n", + "4 None None NaN None None ... \n", + "... ... ... ... ... ... ... \n", + "88726 None None NaN None None ... \n", + "88727 None None NaN None None ... \n", + "88728 None None NaN None None ... \n", + "88729 None None NaN None None ... \n", + "88730 None None NaN None None ... \n", + "\n", + " socials emails phones brand addresses sourceTags wikidata surface \\\n", + "0 None None None None None None None None \n", + "1 None None None None None None None None \n", + "2 None None None None None None None None \n", + "3 None None None None None None None None \n", + "4 None None None None None None None None \n", + "... ... ... ... ... ... ... ... ... \n", + "88726 None None None None None None None None \n", + "88727 None None None None None None None None \n", + "88728 None None None None None None None None \n", + "88729 None None None None None None None None \n", + "88730 None None None None None None None None \n", + "\n", + " isSalt isIntermittent \n", + "0 None None \n", + "1 None None \n", + "2 None None \n", + "3 None None \n", + "4 None None \n", + "... ... ... \n", + "88726 None None \n", + "88727 None None \n", + "88728 None None \n", + "88729 None None \n", + "88730 None None \n", + "\n", + "[88731 rows x 37 columns]" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "gdf = gpd.read_parquet(pth)\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty GeoDataFrame\n", + "Columns: [id, geometry, bbox, subType, localityType, names, contextId, adminLevel, isoCountryCodeAlpha2, isoSubCountryCode, defaultLanguage, drivingSide, version, updateTime, sources, isMaritime, geopolDisplay, localityId, height, numFloors, class, level, connectors, road, categories, confidence, websites, socials, emails, phones, brand, addresses, sourceTags, wikidata, surface, isSalt, isIntermittent]\n", + "Index: []\n", + "\n", + "[0 rows x 37 columns]\n" + ] + } + ], + "source": [ + "# Define the bounding box for the query\n", + "airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621]\n", + "\n", + "# Query for geometries whose bounding boxes intersect with the airlie_bbox\n", + "result = gdf[gdf['geometry'].apply(lambda geom: geom.intersects(airlie_bbox))]\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty GeoDataFrame\n", + "Columns: [id, geometry, bbox, subType, localityType, names, contextId, adminLevel, isoCountryCodeAlpha2, isoSubCountryCode, defaultLanguage, drivingSide, version, updateTime, sources, isMaritime, geopolDisplay, localityId, height, numFloors, class, level, connectors, road, categories, confidence, websites, socials, emails, phones, brand, addresses, sourceTags, wikidata, surface, isSalt, isIntermittent]\n", + "Index: []\n", + "\n", + "[0 rows x 37 columns]\n" + ] + } + ], + "source": [ + "from shapely.geometry import box\n", + "\n", + "# Define the bounding box for the query\n", + "airlie_bbox = box(148.7077, -20.2780, 148.7324, -20.2621)\n", + "\n", + "# Convert the bounding box coordinates to a Shapely box geometry\n", + "query_bbox = box(*airlie_bbox.bounds)\n", + "\n", + "# Check if the bounding box in each row intersects with the query_bbox\n", + "result = gdf[gdf['bbox'].apply(lambda bbox: box(bbox['minx'], bbox['miny'], bbox['maxx'], bbox['maxy']).intersects(query_bbox))]\n", + "\n", + "# Print the resulting GeoDataFrame\n", + "print(result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/aequilibrae/project/network/more downloads.ipynb b/aequilibrae/project/network/more downloads.ipynb new file mode 100644 index 000000000..68780c8b8 --- /dev/null +++ b/aequilibrae/project/network/more downloads.ipynb @@ -0,0 +1,867 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import json\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from shapely.geometry import LineString, LinearRing\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def initialise_duckdb_spatial():\n", + " conn = duckdb.connect()\n", + " c = conn.cursor()\n", + "\n", + " c.execute(\n", + " \"\"\"INSTALL spatial; \n", + " INSTALL httpfs;\n", + " INSTALL parquet;\n", + " \"\"\"\n", + " )\n", + " c.execute(\n", + " \"\"\"LOAD spatial;\n", + " LOAD parquet;\n", + " SET s3_region='us-west-2';\n", + " \"\"\"\n", + " )\n", + " return c" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgeometrybboxsubTypelocalityTypenamescontextIdadminLevelisoCountryCodeAlpha2isoSubCountryCode...socialsemailsphonesbrandaddressessourceTagswikidatasurfaceisSaltisIntermittent
08a2e712b282ffff-17DFF24660C0674BLINESTRING (136.87391 37.21506, 136.87418 37.2...{'minx': 136.8739104, 'maxx': 136.8741804, 'mi...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
1882e7174d3fffff-13FF4449D72183C2LINESTRING (136.87957 37.23146, 136.88032 37.2...{'minx': 136.879572, 'maxx': 136.8833539, 'min...roadNoneNoneNoneNaNNoneNone...NoneNoneNoneNoneNoneNoneNoneNoneNoneNone
\n", + "

2 rows × 37 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 8a2e712b282ffff-17DFF24660C0674B \n", + "1 882e7174d3fffff-13FF4449D72183C2 \n", + "\n", + " geometry \\\n", + "0 LINESTRING (136.87391 37.21506, 136.87418 37.2... \n", + "1 LINESTRING (136.87957 37.23146, 136.88032 37.2... \n", + "\n", + " bbox subType localityType \\\n", + "0 {'minx': 136.8739104, 'maxx': 136.8741804, 'mi... road None \n", + "1 {'minx': 136.879572, 'maxx': 136.8833539, 'min... road None \n", + "\n", + " names contextId adminLevel isoCountryCodeAlpha2 isoSubCountryCode ... \\\n", + "0 None None NaN None None ... \n", + "1 None None NaN None None ... \n", + "\n", + " socials emails phones brand addresses sourceTags wikidata surface isSalt \\\n", + "0 None None None None None None None None None \n", + "1 None None None None None None None None None \n", + "\n", + " isIntermittent \n", + "0 None \n", + "1 None \n", + "\n", + "[2 rows x 37 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'E:\\theme=transportation\\type=segment\\part-00232-8d133ca6-6cbd-48b8-87d5-a0850a2ba489.c003.zstd.parquet'\n", + "gdf = gpd.read_parquet(pth)\n", + "\n", + "gdf[0:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.float64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pth = r'C:\\Users\\penny\\git\\Aequilibrae\\tests\\data\\overture\\theme=transportation\\type=segment\\transportation_data_segment.parquet'\n", + "df = pd.read_parquet(pth)\n", + "geo = gpd.GeoSeries.from_wkb(df.geometry, crs=4326)\n", + "gdf = gpd.GeoDataFrame(df,geometry=geo)\n", + "gdf['speed'] = gdf['speed'].apply(lambda x: json.loads(x)[0] if x else None)\n", + "\n", + "gdf.to_parquet(r'C:\\Users\\penny\\git\\Aequilibrae\\tests\\data\\overture\\theme=transportation\\type=segment\\transportation_data_segment_airlie_beach.parquet')\n", + "type(gdf['speed'][1])" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 70.0\n", + "2 NaN\n", + "3 50.0\n", + "4 NaN\n", + " ... \n", + "276 NaN\n", + "277 NaN\n", + "278 40.0\n", + "279 50.0\n", + "280 70.0\n", + "Name: speed, Length: 281, dtype: float64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# gdf['speed'] = \n", + "gdf['speed'].apply(lambda x: json.loads(x)[0] if x else None)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idbboxa_nodeb_nodelink_typespeedgeometry
08b9d0e12872dfff-167FE27DAE6F0272{'maxx': 148.7165748, 'maxy': -20.2730078, 'mi...8f9d0e12872d085-167FE2B34902B1488f9d0e12872d292-167EE24C8AA265F0secondaryNaNLINESTRING (148.71657 -20.27307, 148.71651 -20...
18b9d0e128cd9fff-163FF6797FC40661{'maxx': 148.7247078, 'maxy': -20.2747175, 'mi...8f9d0e128cd9709-167FF64A37F1BFFB8f9d0e128cd98d6-15FFF68E65613FDFsecondary70.0LINESTRING (148.72460 -20.27472, 148.72465 -20...
28a9d0e1284effff-17BFEE367410456E{'maxx': 148.7216872, 'maxy': -20.269086, 'min...8f9d0e1284ec01d-17BFFF2E889165D38f9d0e1284ec0eb-17BFEF097DAAE5D1unknownNaNLINESTRING (148.72169 -20.26929, 148.72163 -20...
3889d0e1287fffff-147FF893549FBF23{'maxx': 148.7193104, 'maxy': -20.270149, 'min...8f9d0e1287a434a-157EE961030FB9708f9d0e12871415a-173FE7D6F5926937secondary50.0LINESTRING (148.71931 -20.27015, 148.71924 -20...
4869d0e12fffffff-167FF1BC7BC46279{'maxx': 148.7228411, 'maxy': -20.2662583, 'mi...8f9d0e12ab2d0b3-16FEF0F6D47F3E2F8f9d0e1284d3910-157EF1FC540A0515unknownNaNLINESTRING (148.72242 -20.26626, 148.72276 -20...
........................
276899d0e12867ffff-15BEF645DFC7D25C{'maxx': 148.7122258, 'maxy': -20.269956, 'min...8f9d0e128646b30-15BFF8152AF1551F8f9d0e12866a2c9-14FFD4B13E67A6AFunknownNaNLINESTRING (148.71223 -20.27006, 148.71161 -20...
277889d0e1285fffff-177EEF26B077B75F{'maxx': 148.7218181, 'maxy': -20.268822, 'min...8f9d0e1284ee0c5-14BEEF805C9D97A78f9d0e1284eeb00-143FEF7119103FFDresidentialNaNLINESTRING (148.72182 -20.26882, 148.72181 -20...
278869d0e12fffffff-15FFD939CAC49013{'maxx': 148.7132732, 'maxy': -20.2644693, 'mi...8f9d0e12bd2bd89-177EFA66EE6BBDE58f9d0e12bd2869c-173FDA8ACA23C819residential40.0LINESTRING (148.71318 -20.26447, 148.71323 -20...
279869d0e12fffffff-14FEFD6B3C7AEF26{'maxx': 148.7145647, 'maxy': -20.2646887, 'mi...8f9d0e12bd22a6a-16FFDDCAF35E76318f9d0e1286d694d-177FFD1D35C31328residential50.0LINESTRING (148.71456 -20.26469, 148.71429 -20...
280889d0e1285fffff-163EF5CEB5006F3F{'maxx': 148.7245302, 'maxy': -20.2746973, 'mi...8f9d0e12856450c-163EF592716BEB4D8f9d0e128cd9662-167EF61F6866D214secondary70.0LINESTRING (148.72430 -20.27484, 148.72433 -20...
\n", + "

281 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 8b9d0e12872dfff-167FE27DAE6F0272 \n", + "1 8b9d0e128cd9fff-163FF6797FC40661 \n", + "2 8a9d0e1284effff-17BFEE367410456E \n", + "3 889d0e1287fffff-147FF893549FBF23 \n", + "4 869d0e12fffffff-167FF1BC7BC46279 \n", + ".. ... \n", + "276 899d0e12867ffff-15BEF645DFC7D25C \n", + "277 889d0e1285fffff-177EEF26B077B75F \n", + "278 869d0e12fffffff-15FFD939CAC49013 \n", + "279 869d0e12fffffff-14FEFD6B3C7AEF26 \n", + "280 889d0e1285fffff-163EF5CEB5006F3F \n", + "\n", + " bbox \\\n", + "0 {'maxx': 148.7165748, 'maxy': -20.2730078, 'mi... \n", + "1 {'maxx': 148.7247078, 'maxy': -20.2747175, 'mi... \n", + "2 {'maxx': 148.7216872, 'maxy': -20.269086, 'min... \n", + "3 {'maxx': 148.7193104, 'maxy': -20.270149, 'min... \n", + "4 {'maxx': 148.7228411, 'maxy': -20.2662583, 'mi... \n", + ".. ... \n", + "276 {'maxx': 148.7122258, 'maxy': -20.269956, 'min... \n", + "277 {'maxx': 148.7218181, 'maxy': -20.268822, 'min... \n", + "278 {'maxx': 148.7132732, 'maxy': -20.2644693, 'mi... \n", + "279 {'maxx': 148.7145647, 'maxy': -20.2646887, 'mi... \n", + "280 {'maxx': 148.7245302, 'maxy': -20.2746973, 'mi... \n", + "\n", + " a_node b_node \\\n", + "0 8f9d0e12872d085-167FE2B34902B148 8f9d0e12872d292-167EE24C8AA265F0 \n", + "1 8f9d0e128cd9709-167FF64A37F1BFFB 8f9d0e128cd98d6-15FFF68E65613FDF \n", + "2 8f9d0e1284ec01d-17BFFF2E889165D3 8f9d0e1284ec0eb-17BFEF097DAAE5D1 \n", + "3 8f9d0e1287a434a-157EE961030FB970 8f9d0e12871415a-173FE7D6F5926937 \n", + "4 8f9d0e12ab2d0b3-16FEF0F6D47F3E2F 8f9d0e1284d3910-157EF1FC540A0515 \n", + ".. ... ... \n", + "276 8f9d0e128646b30-15BFF8152AF1551F 8f9d0e12866a2c9-14FFD4B13E67A6AF \n", + "277 8f9d0e1284ee0c5-14BEEF805C9D97A7 8f9d0e1284eeb00-143FEF7119103FFD \n", + "278 8f9d0e12bd2bd89-177EFA66EE6BBDE5 8f9d0e12bd2869c-173FDA8ACA23C819 \n", + "279 8f9d0e12bd22a6a-16FFDDCAF35E7631 8f9d0e1286d694d-177FFD1D35C31328 \n", + "280 8f9d0e12856450c-163EF592716BEB4D 8f9d0e128cd9662-167EF61F6866D214 \n", + "\n", + " link_type speed geometry \n", + "0 secondary NaN LINESTRING (148.71657 -20.27307, 148.71651 -20... \n", + "1 secondary 70.0 LINESTRING (148.72460 -20.27472, 148.72465 -20... \n", + "2 unknown NaN LINESTRING (148.72169 -20.26929, 148.72163 -20... \n", + "3 secondary 50.0 LINESTRING (148.71931 -20.27015, 148.71924 -20... \n", + "4 unknown NaN LINESTRING (148.72242 -20.26626, 148.72276 -20... \n", + ".. ... ... ... \n", + "276 unknown NaN LINESTRING (148.71223 -20.27006, 148.71161 -20... \n", + "277 residential NaN LINESTRING (148.72182 -20.26882, 148.72181 -20... \n", + "278 residential 40.0 LINESTRING (148.71318 -20.26447, 148.71323 -20... \n", + "279 residential 50.0 LINESTRING (148.71456 -20.26469, 148.71429 -20... \n", + "280 secondary 70.0 LINESTRING (148.72430 -20.27484, 148.72433 -20... \n", + "\n", + "[281 rows x 7 columns]" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LINESTRING (0 0, 1 1)\n", + "LINESTRING (1 1, 2 2)\n", + "LINESTRING (0 0, 1 0)\n", + "LINESTRING (1 0, 1 1)\n", + "LINESTRING (1 1, 0 1)\n", + "LINESTRING (0 1, 0 0)\n" + ] + } + ], + "source": [ + "from shapely.geometry import LineString, LinearRing\n", + "\n", + "\n", + "def segments(curve):\n", + " return list(map(LineString, zip(curve.coords[:-1], curve.coords[1:])))\n", + "\n", + "\n", + "line = LineString([(0, 0), (1, 1), (2, 2)])\n", + "ring = LinearRing([(0, 0), (1, 0), (1, 1), (0, 1)])\n", + "\n", + "line_segments = segments(line)\n", + "for segment in line_segments:\n", + " print(segment)\n", + "# LINESTRING (0 0, 1 1)\n", + "# LINESTRING (1 1, 2 2)\n", + "\n", + "ring_segments = segments(ring)\n", + "for segment in ring_segments:\n", + " print(segment)\n", + "# LINESTRING (0 0, 1 0)\n", + "# LINESTRING (1 0, 1 1)\n", + "# LINESTRING (1 1, 0 1)\n", + "# LINESTRING (0 1, 0 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'int' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mLineString\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m148\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m20\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\penny\\git\\Aequilibrae\\.venv\\Lib\\site-packages\\shapely\\geometry\\linestring.py:66\u001b[0m, in \u001b[0;36mLineString.__new__\u001b[1;34m(self, coordinates)\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 64\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28mfloat\u001b[39m(c) \u001b[38;5;28;01mfor\u001b[39;00m c \u001b[38;5;129;01min\u001b[39;00m o]\n\u001b[1;32m---> 66\u001b[0m coordinates \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43m_coords\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mo\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcoordinates\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 68\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(coordinates) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 69\u001b[0m \u001b[38;5;66;03m# empty geometry\u001b[39;00m\n\u001b[0;32m 70\u001b[0m \u001b[38;5;66;03m# TODO better constructor + should shapely.linestrings handle this?\u001b[39;00m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m shapely\u001b[38;5;241m.\u001b[39mfrom_wkt(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLINESTRING EMPTY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mTypeError\u001b[0m: 'int' object is not iterable" + ] + } + ], + "source": [ + "LineString(148 -20)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Name Age City\n", + "0 Alice 50 NEW YORK\n", + "1 Bob 60 SAN FRANCISCO\n", + "2 Charlie 70 LOS ANGELES\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "# Example DataFrame\n", + "data = {'Name': ['Alice', 'Bob', 'Charlie'],\n", + " 'Age': [25, 30, 35],\n", + " 'City': ['New York', 'San Francisco', 'Los Angeles']}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Function to process each row and create a new DataFrame\n", + "def process_row(row):\n", + " name = row['Name']\n", + " age = row['Age']\n", + " city = row['City']\n", + "\n", + " # Your processing logic here\n", + " # For example, create a new DataFrame with processed data\n", + " processed_data = {'Name': [name], 'Age': [age * 2], 'City': [city.upper()]}\n", + " processed_df = pd.DataFrame(processed_data)\n", + "\n", + " return processed_df\n", + "\n", + "# Iterate over rows using iterrows()\n", + "result_dfs = []\n", + "for index, row in df.iterrows():\n", + " processed_df = process_row(row)\n", + " result_dfs.append(processed_df)\n", + "\n", + "# Concatenate the resulting DataFrames into a final DataFrame\n", + "final_result = pd.concat(result_dfs, ignore_index=True)\n", + "\n", + "# Display the final result\n", + "print(final_result)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeCity
0Alice25New York
1Bob30San Francisco
2Charlie35Los Angeles
\n", + "
" + ], + "text/plain": [ + " Name Age City\n", + "0 Alice 25 New York\n", + "1 Bob 30 San Francisco\n", + "2 Charlie 35 Los Angeles" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeCity
0Alice50NEW YORK
1Bob60SAN FRANCISCO
2Charlie70LOS ANGELES
\n", + "
" + ], + "text/plain": [ + " Name Age City\n", + "0 Alice 50 NEW YORK\n", + "1 Bob 60 SAN FRANCISCO\n", + "2 Charlie 70 LOS ANGELES" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_result" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameAgeCity
0Bob60SAN FRANCISCO
\n", + "
" + ], + "text/plain": [ + " Name Age City\n", + "0 Bob 60 SAN FRANCISCO" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_dfs[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Name Age City a_node b_node\n", + "0 Alice 25 New York 1 2\n", + "1 Alice 25 New York 2 3\n", + "2 Bob 30 San Francisco 4 5\n", + "3 Bob 30 San Francisco 5 6\n", + "4 Charlie 35 Los Angeles 7 8\n", + "5 Charlie 35 Los Angeles 8 9\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Example DataFrame\n", + "data = {'Name': ['Alice', 'Bob', 'Charlie'],\n", + " 'Age': [25, 30, 35],\n", + " 'City': ['New York', 'San Francisco', 'Los Angeles'],\n", + " 'Connectors': [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "# Function to process each row and create a new DataFrame\n", + "def process_row(row):\n", + " name = row['Name']\n", + " age = row['Age']\n", + " city = row['City']\n", + " connectors = row['Connectors']\n", + "\n", + " # Check if 'Connectors' has more than 2 elements\n", + " if np.size(connectors) > 2:\n", + " # Split the DataFrame into multiple rows\n", + " rows = []\n", + " for i in range(len(connectors) - 1):\n", + " new_row = {'Name': name, 'Age': age, 'City': city, 'a_node': connectors[i], 'b_node': connectors[i + 1]}\n", + " rows.append(new_row)\n", + " processed_df = pd.DataFrame(rows)\n", + " else:\n", + " # For cases where 'Connectors' has 2 or fewer elements\n", + " processed_df = pd.DataFrame({'Name': [name], 'Age': [age], 'City': [city], 'a_node': connectors[0], 'b_node': connectors[-1]})\n", + "\n", + " return processed_df\n", + "\n", + "# Iterate over rows using iterrows()\n", + "result_dfs = []\n", + "for index, row in df.iterrows():\n", + " processed_df = process_row(row)\n", + " result_dfs.append(processed_df)\n", + "\n", + "# Concatenate the resulting DataFrames into a final DataFrame\n", + "final_result = pd.concat(result_dfs, ignore_index=True)\n", + "\n", + "# Display the final result\n", + "print(final_result)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/aequilibrae/project/network/network.py b/aequilibrae/project/network/network.py index 094139545..b4532e079 100644 --- a/aequilibrae/project/network/network.py +++ b/aequilibrae/project/network/network.py @@ -2,6 +2,7 @@ import math from sqlite3 import Connection as sqlc from typing import Dict +from pathlib import Path import numpy as np import pandas as pd @@ -13,6 +14,8 @@ from aequilibrae.context import get_logger from aequilibrae.parameters import Parameters from aequilibrae.project.network import OSMDownloader +from aequilibrae.project.network.ovm_builder import OVMBuilder +from aequilibrae.project.network.ovm_downloader import OVMDownloader from aequilibrae.project.network.gmns_builder import GMNSBuilder from aequilibrae.project.network.gmns_exporter import GMNSExporter from aequilibrae.project.network.haversine import haversine @@ -21,6 +24,7 @@ from aequilibrae.project.network.modes import Modes from aequilibrae.project.network.nodes import Nodes from aequilibrae.project.network.osm_builder import OSMBuilder +# from aequilibrae.project.network.ovm_builder import OVMBuilder from aequilibrae.project.network.osm_utils.place_getter import placegetter from aequilibrae.project.project_creation import req_link_flds, req_node_flds, protected_fields from aequilibrae.utils import WorkerThread @@ -119,6 +123,94 @@ def list_modes(self): curr.execute("""select mode_id from modes""") return [x[0] for x in curr.fetchall()] + def create_from_ovm( + self, + west: float = None, + south: float = None, + east: float = None, + north: float = None, + place_name: str = None, + data_source: Path = None, + output_dir: Path = None, + modes=["car", "transit", "bicycle", "walk"], + ) -> None: + """ + Downloads the network from Open-Street Maps + + :Arguments: + **west** (:obj:`float`, Optional): West most coordinate of the download bounding box + + **south** (:obj:`float`, Optional): South most coordinate of the download bounding box + + **east** (:obj:`float`, Optional): East most coordinate of the download bounding box + + **place_name** (:obj:`str`, Optional): If not downloading with East-West-North-South boundingbox, this is + required + + **modes** (:obj:`list`, Optional): List of all modes to be downloaded. Defaults to the modes in the parameter + file + + .. code-block:: python + + >>> from aequilibrae import Project + + >>> p = Project() + >>> p.new("/tmp/new_project") + + # Save the parameters to disk + >>> par.write_back() + + # Now we can import the network for any place we want + # p.network.create_from_ovm(place_name="my_beautiful_hometown") + + >>> p.close() + """ + + if self.count_links() > 0: + raise FileExistsError("You can only import an OVM network into a brand new model file") + + curr = self.conn.cursor() + curr.execute("""ALTER TABLE links ADD COLUMN ovm_id integer""") + curr.execute("""ALTER TABLE nodes ADD COLUMN ovm_id integer""") + self.conn.commit() + + if isinstance(modes, (tuple, list)): + modes = list(modes) + elif isinstance(modes, str): + modes = [modes] + else: + raise ValueError("'modes' needs to be string or list/tuple of string") + + if place_name is None: + if min(east, west) < -180 or max(east, west) > 180 or min(north, south) < -90 or max(north, south) > 90: + raise ValueError("Coordinates out of bounds") + bbox = [west, south, east, north] + else: + bbox, report = placegetter(place_name) + west, south, east, north = bbox + if bbox is None: + msg = f'We could not find a reference for place name "{place_name}"' + self.logger.warning(msg) + return + for i in report: + if "PLACE FOUND" in i: + self.logger.info(i) + + self.logger.info("Downloading data") + self.downloader = OVMDownloader(modes, self.source, logger=self.logger) + if pyqt: + self.downloader.downloading.connect(self.signal_handler) + segments_gdf, connectors_gdf = self.downloader.downloadTransportation(bbox,data_source,output_dir) + + self.logger.info("Building Network") + self.builder = OVMBuilder(segments_gdf, connectors_gdf, self.source, project=self.project) + + if pyqt: + self.builder.building.connect(self.signal_handler) + + self.builder.doWork(output_dir) + self.logger.info("Network built successfully") + def create_from_osm( self, west: float = None, diff --git a/aequilibrae/project/network/ovm_builder.py b/aequilibrae/project/network/ovm_builder.py new file mode 100644 index 000000000..45b1aa31d --- /dev/null +++ b/aequilibrae/project/network/ovm_builder.py @@ -0,0 +1,437 @@ +import json +import importlib.util as iutil +import sqlite3 +import logging +from pathlib import Path +import string + +from aequilibrae.context import get_active_project +from aequilibrae.parameters import Parameters +from aequilibrae.project.network.link_types import LinkTypes +from aequilibrae.context import get_logger +import importlib.util as iutil +from aequilibrae.utils.spatialite_utils import connect_spatialite +from aequilibrae.project.network.haversine import haversine +from aequilibrae.utils import WorkerThread + +# from .haversine import haversine +# from ...utils import WorkerThread + +import duckdb +import shapely +import geopandas as gpd +import pandas as pd +import numpy as np +from typing import Union +from shapely.geometry import LineString, Point + +spec = iutil.find_spec("PyQt5") +pyqt = spec is not None +if pyqt: + from PyQt5.QtCore import pyqtSignal + +spec = iutil.find_spec("qgis") +isqgis = spec is not None +if isqgis: + import qgis + + +class OVMBuilder(WorkerThread): + if pyqt: + building = pyqtSignal(object) + + def __init__( + self, + gdf_segments: gpd.GeoDataFrame, + gdf_connectors: gpd.GeoDataFrame, + project_path: Union[str, Path], + logger: logging.Logger = None, + node_start=10000, + project=None, + ) -> None: + WorkerThread.__init__(self, None) + self.project = project or get_active_project() + self.logger = logger or get_logger() + self.node_start = node_start + self.report = [] + self.conn = None + self.GeoDataFrame = [] + self.nodes = {} + self.node_ids = {} + self.links_gdf = gdf_segments + self.nodes_gdf = gdf_connectors + self.__link_types = None # type: LinkTypes + self.__model_link_types = [] + self.__model_link_type_ids = [] + self.__link_type_quick_reference = {} + self.__project_path = Path(project_path) + self.pth = str(self.__project_path).replace("\\", "/") + + def __emit_all(self, *args): + if pyqt: + self.building.emit(*args) + + def doWork(self, output_dir: Path): + self.conn = connect_spatialite(self.pth) + self.curr = self.conn.cursor() + self._worksetup() + self.formatting(self.links_gdf, self.nodes_gdf, output_dir) + self.__emit_all(["finished_threaded_procedure", 0]) + + def formatting(self, links_gdf: gpd.GeoDataFrame, nodes_gdf: gpd.GeoDataFrame, output_dir: Path): + output_dir = Path(output_dir) + output_file_link = output_dir / f"type=segment" / f"transportation_data_segment.parquet" + output_file_node = output_dir / f"type=connector" / f"transportation_data_connector.parquet" + + links_gdf = links_gdf.copy() + links_gdf["name"] = links_gdf["name"].apply(lambda x: json.loads(x)[0]["value"] if x else None) + + nodes_gdf = nodes_gdf.copy() + nodes_gdf["node_id"] = self.create_node_ids(nodes_gdf) + nodes_gdf["ogc_fid"] = pd.Series(list(range(1, len(nodes_gdf) + 1))) + nodes_gdf["is_centroid"] = 0 + + # Iterate over rows using iterrows() + result_dfs = [self.split_connectors(row) for _, row in links_gdf.iterrows()] + + # Concatenate the resulting DataFrames into a final GeoDataFrame + links_gdf = pd.concat((df.dropna(axis=1, how="all") for df in result_dfs), ignore_index=True) + + # adding neccassary columns for aequilibrea data frame + links_gdf["link_id"] = pd.Series(list(range(1, len(links_gdf) + 1))) + links_gdf["ogc_fid"] = pd.Series(list(range(1, len(links_gdf) + 1))) + links_gdf["geometry"] = [ + self.trim_geometry(self.node_ids, row) + for e, row in links_gdf[["a_node", "b_node", "geometry"]].iterrows() + ] + + distance_list = [] + for i in range(0, len(links_gdf)): + distance = sum( + [ + haversine(x[0], x[1], y[0], y[1]) + for x, y in zip( + list(links_gdf["geometry"][i].coords)[1:], list(links_gdf["geometry"][i].coords)[:-1] + ) + ] + ) + distance_list.append(distance) + links_gdf["distance"] = distance_list + + mode_codes, not_found_tags = self.modes_per_link_type() + links_gdf["modes"] = links_gdf["link_type"].apply(lambda x: mode_codes.get(x, not_found_tags)) + + common_nodes = links_gdf["a_node"].isin(nodes_gdf["node_id"]) + + # Check if any common nodes exist + if common_nodes.any(): + # If common node exist, retrieve the DataFrame of matched rows using boolean indexing + matched_rows = links_gdf[common_nodes] + + # Create the 'link_types' and 'modes' columns for the 'nodes_gdf' DataFrame + nodes_gdf["link_types"] = matched_rows["link_type"] + nodes_gdf["modes"] = matched_rows["modes"] + else: + # No common nodes found + raise ValueError("No common nodes.") + fields = self.get_link_fields() + link_order = fields.copy() + ["geometry"] + + for element in link_order: + if element not in links_gdf: + links_gdf[element] = None + + links_gdf = links_gdf[link_order] + links_gdf.to_parquet(output_file_link) + + # For goemetry to work in the sql + links_gdf = pd.DataFrame(links_gdf) + links_gdf["geometry"] = links_gdf["geometry"].apply(lambda x: x.wkb) + + node_order = ["ogc_fid", "node_id", "is_centroid", "modes", "link_types", "ovm_id", "geometry"] + nodes_gdf = nodes_gdf[node_order] + + nodes_gdf.to_parquet(output_file_node) + + self.__update_table_structure() + field_names = ",".join(fields) + + self.logger.info("Adding network nodes") + self.__emit_all(["text", "Adding network nodes"]) + + node_df = pd.DataFrame(nodes_gdf[["node_id", "is_centroid", "modes", "link_types", "ovm_id"]]) # drop geom and ogc_fid + node_df['x'] = nodes_gdf.geometry.apply(lambda x: x.coords[0][0]) + node_df['y'] = nodes_gdf.geometry.apply(lambda x: x.coords[0][1]) + node_records = node_df.drop_duplicates(subset=['x', 'y']).to_records(index=False) + + sql = "insert into nodes(node_id, is_centroid, modes, link_types, ovm_id, geometry) Values(?, ?, ?, ?, ?, MakePoint(?,?, 4326))" + self.conn.executemany(sql, node_records) + self.conn.commit() + del nodes_gdf + + all_attrs = links_gdf.values.tolist() + + + insert_qry = """INSERT INTO "links" ({}, geometry) VALUES({}, GeomFromWKB(?, 4326))""" + sql = insert_qry.format(field_names, ",".join(["?"] * (len(link_order) - 1))) + self.logger.info("Adding network links") + self.__emit_all(["text", "Adding network links"]) + try: + self.curr.executemany(sql, all_attrs) + except Exception as e: + self.logger.error("error when inserting link {}. Error {}".format(all_attrs[0], e.args)) + self.logger.error(sql) + raise e + + self.conn.commit() + del links_gdf + self.curr.close() + + def _worksetup(self): + self.__link_types = self.project.network.link_types + lts = self.__link_types.all_types() + for lt_id, lt in lts.items(): + self.__model_link_types.append(lt.link_type) + self.__model_link_type_ids.append(lt_id) + + def __repair_link_type(self, link_type: str) -> str: + original_link_type = link_type + link_type = "".join([x for x in link_type if x in string.ascii_letters + "_"]).lower() + split = link_type.split("_") + for i, piece in enumerate(split[1:]): + if piece in ["link", "segment", "stretch"]: + link_type = "_".join(split[0 : i + 1]) + + if len(link_type) == 0: + link_type = "empty" + + if len(self.__model_link_type_ids) >= 51 and link_type not in self.__model_link_types: + link_type = "aggregate_link_type" + + if link_type in self.__model_link_types: + lt = self.__link_types.get_by_name(link_type) + if original_link_type not in lt.description: + lt.description += f", {original_link_type}" + lt.save() + self.__link_type_quick_reference[original_link_type.lower()] = link_type + return link_type + + letter = link_type[0] + if letter in self.__model_link_type_ids: + letter = letter.upper() + if letter in self.__model_link_type_ids: + for letter in string.ascii_letters: + if letter not in self.__model_link_type_ids: + break + letter + lt = self.__link_types.new(letter) + lt.link_type = link_type + lt.description = f"Link types from Overture Maps: {original_link_type}" + lt.save() + self.__model_link_types.append(link_type) + self.__model_link_type_ids.append(letter) + self.__link_type_quick_reference[original_link_type.lower()] = link_type + return link_type + + def create_node_ids(self, data_frame: gpd.GeoDataFrame) -> pd.Series: + """ + Creates node_ids as well as the self.nodes and self.node_ids dictories + """ + node_ids = [] + data_frame["node_id"] = 1 + for i in range(len(data_frame)): + node_count = i + self.node_start + node_ids.append(node_count) + self.node_ids[node_count] = { + "ovm_id": data_frame["ovm_id"][i], + "lat": data_frame["geometry"][i].y, + "lon": data_frame["geometry"][i].x, + "coord": (data_frame["geometry"][i].x, data_frame["geometry"][i].y), + } + self.nodes[data_frame["ovm_id"][i]] = { + "lat": data_frame["geometry"][i].y, + "lon": data_frame["geometry"][i].x, + "coord": (data_frame["geometry"][i].x, data_frame["geometry"][i].y), + "node_id": node_count, + } + data_frame["node_id"] = pd.Series(node_ids) + return data_frame["node_id"] + + def modes_per_link_type(self): + p = Parameters(self.project) + modes = p.parameters["network"]["ovm"]["modes"] + result = [(key, key[0]) for key in modes.keys()] + mode_codes = {p[0]: p[1] for p in result} + type_list = {} + notfound = "" + for mode, val in modes.items(): + all_types = val["link_types"] + md = mode_codes[mode] + for tp in all_types: + type_list[tp] = "{}{}".format(type_list.get(tp, ""), md) + if val["unknown_tags"]: + notfound += md + + type_list = {k: "".join(set(v)) for k, v in type_list.items()} + return type_list, "{}".format(notfound) + + def trim_geometry(self, node_lu: dict, row: dict) -> shapely.LineString: + lat_long_a = node_lu[row["a_node"]]["coord"] + lat_long_b = node_lu[row["b_node"]]["coord"] + start, end = -1, -1 + for j, coord in enumerate(row.geometry.coords): + if lat_long_a == coord: + start = j + if lat_long_b == coord: + end = j + if start < 0 or end < 0: + raise RuntimeError("Couldn't find the start end coords in the given linestring") + return shapely.LineString(row.geometry.coords[start : end + 1]) + + # Function to process each row and create a new GeoDataFrame + def split_connectors(self, row: dict) -> gpd.GeoDataFrame: + # Extract necessary information from the row + connectors = row["connectors"] + + direction_dictionary = self.get_direction(row["direction"]) + # Check if 'Connectors' has more than 2 elements + if np.size(connectors) >= 2: + # Split the DataFrame into multiple rows + rows = [] + + for i in range(len(connectors) - 1): + new_row = { + "a_node": self.nodes[connectors[i]]["node_id"], + "b_node": self.nodes[connectors[i + 1]]["node_id"], + "direction": direction_dictionary["direction"], + "link_type": self.__link_type_quick_reference.get( + row["link_type"].lower(), self.__repair_link_type(row["link_type"]) + ), + "name": row["name"], + "speed_ab": self.get_speed(row["speed"]), + "ovm_id": row["ovm_id"], + "geometry": row["geometry"], + "lanes_ab": direction_dictionary["lanes_ab"], + "lanes_ba": direction_dictionary["lanes_ba"], + } + rows.append(new_row) + processed_df = gpd.GeoDataFrame(rows) + else: + raise ValueError("Invalid amount of connectors provided. Must be 2< to be considered a link.") + return processed_df + + def get_speed(self, speed_row) -> float: + """ + This function returns the speed of a road, if they have multiple speeds listed it will total the speeds listed by the proportions of the road they makeup. + """ + if speed_row == None: + adjusted_speed = speed_row + else: + speed = json.loads(speed_row) + if type(speed) == dict: + adjusted_speed = speed["maxSpeed"][0] + elif type(speed) == list and len(speed) >= 1: + # Extract the 'at' list from each dictionary + # eg [[0.0, 0.064320774], [0.064320774, 1.0]] + at_values_list = [entry["at"] for entry in speed] + + # Calculate differences between consecutive numbers in each 'at' list. This list iterates through each 'at' + # list in at_values_list and calculates the difference between consecutive elements using (at[i + 1] - at[i]). + # The result is a flat list of differences for all 'at' lists. + # eg [0.064320774, 0.935679226] + differences = [ + diff for at in at_values_list for diff in (at[i + 1] - at[i] for i in range(len(at) - 1)) + ] + + new_list = [] + for element in differences: + # Find the index of the value in the differences list + index_d = differences.index(element) + + # Access the corresponding entry in the original 'data' list to access the 'maxSpeed' value + speed_segment = speed[index_d]["maxSpeed"][0] * element + new_list.append(speed_segment) + + adjusted_speed = round(sum(new_list), 2) + return adjusted_speed + + def __update_table_structure(self): + curr = self.conn.cursor() + curr.execute("pragma table_info(Links)") + structure = curr.fetchall() + has_fields = [x[1].lower() for x in structure] + fields = [field.lower() for field in self.get_link_fields()] + ["ovm_id"] + for field in [f for f in fields if f not in has_fields]: + ltype = self.get_link_field_type(field).upper() + curr.execute(f"Alter table Links add column {field} {ltype}") + self.conn.commit() + + @staticmethod + def get_link_fields(): + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + owf = [list(x.keys())[0] for x in fields["one-way"]] + + twf1 = ["{}_ab".format(list(x.keys())[0]) for x in fields["two-way"]] + twf2 = ["{}_ba".format(list(x.keys())[0]) for x in fields["two-way"]] + + return owf + twf1 + twf2 + ["ovm_id"] + + @staticmethod + def get_link_field_type(field_name: list): + p = Parameters() + fields = p.parameters["network"]["links"]["fields"] + + if field_name[-3:].lower() in ["_ab", "_ba"]: + field_name = field_name[:-3] + for tp in fields["two-way"]: + if field_name in tp: + return tp[field_name]["type"] + else: + for tp in fields["one-way"]: + if field_name in tp: + return tp[field_name]["type"] + + @staticmethod + def get_direction(directions_list: list): + new_list = [] + at_dictionary = {} + + # Dictionary mapping direction strings to numeric values or descriptions + direction_dict = { + "forward": 1, + "backward": -1, + "bothWays": 0, + "alternating": "Travel is one-way and changes between forward and backward constantly", + "reversible": "Travel is one-way and changes between forward and backward infrequently", + } + + # Lambda function to check numbers and create a new dictionary + check_numbers = lambda lst: { + "direction": 1 if all(x == 1 for x in lst) else -1 if all(x == -1 for x in lst) else 0, + "lanes_ab": lst.count(1) if 1 in lst else None, + "lanes_ba": lst.count(-1) if -1 in lst else None, + } + + if directions_list is None: + new_list = [-1, 1] + elif directions_list != None: + for direct in directions_list: + if type(direct) == dict: + # Extract direction from the dictionary and append to new_list + direction = direction_dict[direct["direction"]] + new_list.append(direction) + elif type(direct) == list: + a_list = [] + at_dictionary[str(direct[0]["at"])] = direct[0]["at"][1] - direct[0]["at"][0] + max_key = max(at_dictionary, key=at_dictionary.get) + a_list.append(max_key) + + # Check if the current list is the one with maximum 'at' range + if str(direct[0]["at"]) == a_list[-1]: + new_list.clear() + for lists in direct[0]["value"]: + direction = direction_dict[lists["direction"]] + new_list.append(direction) + + return check_numbers(lst=new_list) diff --git a/aequilibrae/project/network/ovm_downloader.py b/aequilibrae/project/network/ovm_downloader.py new file mode 100644 index 000000000..3b9400eb0 --- /dev/null +++ b/aequilibrae/project/network/ovm_downloader.py @@ -0,0 +1,234 @@ +import json +import importlib.util as iutil +import sqlite3 +import logging +from pathlib import Path +import string + +from aequilibrae.context import get_active_project +from aequilibrae.parameters import Parameters +from aequilibrae.project.network.link_types import LinkTypes +from aequilibrae.context import get_logger +import importlib.util as iutil +from aequilibrae.utils.spatialite_utils import connect_spatialite +from aequilibrae.project.network.haversine import haversine +from aequilibrae.utils import WorkerThread + +# from .haversine import haversine +# from ...utils import WorkerThread + +import duckdb +import shapely +import geopandas as gpd +import pandas as pd +import numpy as np +from typing import Union +from shapely.geometry import LineString, Point + +DEFAULT_OVM_S3_LOCATION = "s3://overturemaps-us-west-2/release/2023-11-14-alpha.0//theme=transportation" + +spec = iutil.find_spec("PyQt5") +pyqt = spec is not None +if pyqt: + from PyQt5.QtCore import pyqtSignal + +spec = iutil.find_spec("qgis") +isqgis = spec is not None +if isqgis: + import qgis + +class OVMDownloader(WorkerThread): + if pyqt: + downloading = pyqtSignal(object) + + def __emit_all(self, *args): + if pyqt: + self.downloading.emit(*args) + + def __init__(self, modes: list, project_path: Union[str, Path], logger: logging.Logger = None) -> None: + WorkerThread.__init__(self, None) + self.logger = logger or get_logger() + self.filter = self.get_ovm_filter(modes) + self.GeoDataFrame = [] + self.__project_path = Path(project_path) + self.pth = str(self.__project_path).replace("\\", "/") + self.insert_qry = """INSERT INTO {} ({}, geometry) VALUES({}, GeomFromText(?, 4326))""" + + def initialise_duckdb_spatial(self): + conn = duckdb.connect() + c = conn.cursor() + + c.execute( + """INSTALL spatial; + INSTALL httpfs; + INSTALL parquet; + """ + ) + c.execute( + """LOAD spatial; + LOAD parquet; + SET s3_region='us-west-2'; + """ + ) + return c + + def downloadPlace(self, source, local_file_path=None): + pth = str(self.__project_path / "new_geopackage_pla.parquet").replace("\\", "/") + + if source == "s3": + data_source = "s3://overturemaps-us-west-2/release/2023-11-14-alpha.0/theme=places/type=*" + elif source == "local": + data_source = local_file_path.replace("\\", "/") + else: + raise ValueError("Invalid source. Use 's3' or provide a valid local file path.") + + sql = f""" + COPY( + SELECT + id, + CAST(names AS JSON) AS name, + CAST(categories AS JSON) AS categories, + CAST(brand AS JSON) AS brand, + CAST(addresses AS JSON) AS addresses, + ST_GeomFromWKB(geometry) AS geom + FROM read_parquet('{data_source}/*', filename=true, hive_partitioning=1) + WHERE bbox.minx > '{self.bbox[0]}' + AND bbox.maxx < '{self.bbox[2]}' + AND bbox.miny > '{self.bbox[1]}' + AND bbox.maxy < '{self.bbox[3]}') + TO '{pth}'; + """ + + c = self.initialise_duckdb_spatial() + c.execute(sql) + + + def downloadTransportation(self, bbox: list, data_source: Union[str, Path], output_dir: Union[str, Path]): + data_source = Path(data_source) or DEFAULT_OVM_S3_LOCATION + output_dir = Path(output_dir) / "theme=transportation" + + output_file_link = output_dir / f'type=segment' / f'transportation_data_segment.parquet' + output_file_node = output_dir / f'type=connector' / f'transportation_data_connector.parquet' + # output_file = output_dir / f'type={t}' / f'transportation_data_{t}.parquet' + output_file_link.parent.mkdir(parents=True, exist_ok=True) + output_file_node.parent.mkdir(parents=True, exist_ok=True) + + # Uncomment to see what information is stored the parquet file + # sql = f""" + # DESCRIBE + # SELECT + # road + # FROM read_parquet('{data_source}/type=segment/*', union_by_name=True) + # """ + # c = self.initialise_duckdb_spatial() + # g = c.execute(sql) + # print(g.df()) + + sql_link = f""" + COPY ( + SELECT + id AS ovm_id, + connectors, + CAST(road AS JSON) ->>'lanes' AS direction, + CAST(road AS JSON) ->>'class' AS link_type, + CAST(road AS JSON) ->>'roadNames' ->>'common' AS name, + CAST(road AS JSON) ->>'restrictions' ->> 'speedLimits' AS speed, + geometry + FROM read_parquet('{data_source}/type=segment/*', union_by_name=True) + WHERE bbox.minx > '{bbox[0]}' + AND bbox.maxx < '{bbox[2]}' + AND bbox.miny > '{bbox[1]}' + AND bbox.maxy < '{bbox[3]}') + TO '{output_file_link}' + (FORMAT 'parquet', COMPRESSION 'ZSTD'); + """ + c = self.initialise_duckdb_spatial() + c.execute(sql_link) + + sql_node = f""" + COPY ( + SELECT + id AS ovm_id, + geometry + FROM read_parquet('{data_source}/type=connector/*', union_by_name=True) + WHERE bbox.minx > '{bbox[0]}' + AND bbox.maxx < '{bbox[2]}' + AND bbox.miny > '{bbox[1]}' + AND bbox.maxy < '{bbox[3]}') + TO '{output_file_node}' + (FORMAT 'parquet', COMPRESSION 'ZSTD'); + """ + c.execute(sql_node) + + # Creating links GeoDataFrame + df_link = pd.read_parquet(output_file_link) + geo_link = gpd.GeoSeries.from_wkb(df_link.geometry, crs=4326) + gdf_link = gpd.GeoDataFrame(df_link,geometry=geo_link) + + # Creating nodes GeoDataFrame + df_node = pd.read_parquet(output_file_node) + geo_node = gpd.GeoSeries.from_wkb(df_node.geometry, crs=4326) + gdf_node = gpd.GeoDataFrame(df_node,geometry=geo_node) + + return gdf_link, gdf_node + + def get_ovm_filter(self, modes: list) -> str: + """ + loosely adapted from http://www.github.com/gboeing/osmnx + """ + + p = Parameters().parameters["network"]["ovm"] + all_tags = p["all_link_types"] + + p = p["modes"] + all_modes = list(p.keys()) + + tags_to_keep = [] + for m in modes: + if m not in all_modes: + raise ValueError(f"Mode {m} not listed in the parameters file") + tags_to_keep += p[m]["link_types"] + tags_to_keep = list(set(tags_to_keep)) + + # Default to remove + service = '["service"!~"parking|parking_aisle|driveway|private|emergency_access"]' + access = '["access"!~"private"]' + + filtered = [x for x in all_tags if x not in tags_to_keep] + filtered = "|".join(filtered) + + filter = f'["area"!~"yes"]["highway"!~"{filtered}"]{service}{access}' + + return filter + + def _download_test_data(self, data_source: Union[str, Path]): + '''This method only used to seed/bootstrap a local copy of a small test data set which should be commited to version control''' + airlie_bbox = [148.7077, -20.2780, 148.7324, -20.2621 ] + # brisbane_bbox = [153.1771, -27.6851, 153.2018, -27.6703] + data_source = data_source.replace("\\", "/") + + + for t in ['segment','connector']: + (Path(__file__).parent.parent.parent.parent / "tests" / "data" / "overture" / "theme=transportation" / f'type={t}').mkdir(parents=True, exist_ok=True) + pth1 = Path(__file__).parent.parent.parent.parent / "tests" / "data" / "overture" / "theme=transportation" / f"type={t}" / f'airlie_beach_transportation_{t}.parquet' + sql = f""" + COPY ( + SELECT + * + FROM read_parquet('{data_source}/type={t}/*', union_by_name=True) + WHERE bbox.minx > '{airlie_bbox[0]}' + AND bbox.maxx < '{airlie_bbox[2]}' + AND bbox.miny > '{airlie_bbox[1]}' + AND bbox.maxy < '{airlie_bbox[3]}') + TO '{pth1}' + (FORMAT 'parquet', COMPRESSION 'ZSTD'); + """ + c = self.initialise_duckdb_spatial() + c.execute(sql) + + df = pd.read_parquet(Path(pth1)) + geo = gpd.GeoSeries.from_wkb(df.geometry, crs=4326) + gdf = gpd.GeoDataFrame(df,geometry=geo) + gdf.to_parquet(Path(pth1)) + # return gdf + diff --git a/aequilibrae/project/project.py b/aequilibrae/project/project.py index bb464c2e9..168bfa880 100644 --- a/aequilibrae/project/project.py +++ b/aequilibrae/project/project.py @@ -130,6 +130,11 @@ def close(self) -> None: global_logger.info(f"Closed project on {self.project_base_path}") + for h in self.logger.handlers: + self.logger.removeHandler(h) + h.close() + + except (sqlite3.ProgrammingError, AttributeError): global_logger.warning(f"This project at {self.project_base_path} is already closed") diff --git a/docs/source/examples/creating_models/from_ovm.py b/docs/source/examples/creating_models/from_ovm.py new file mode 100644 index 000000000..d83e4d986 --- /dev/null +++ b/docs/source/examples/creating_models/from_ovm.py @@ -0,0 +1,91 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: -all +# custom_cell_magics: kql +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.11.2 +# kernelspec: +# display_name: venv +# language: python +# name: python3 +# --- + +# %% +""" +Project from Overture Maps +============================= +In this example, we show how to create an empty project and populate it with a network from Overture Maps. +We will use Folium to visualize the network. +""" + +# %% +# Imports +from pathlib import Path +from uuid import uuid4 +from tempfile import gettempdir +from os.path import join +from aequilibrae import Project +import folium + +# %% +# We create an empty project on an arbitrary folder +from shutil import rmtree + +fldr = join(gettempdir(), uuid4().hex) +project = Project() +project.new(fldr) +# %% +# Now we can download the network from any place in the world (as long as you have memory for all the download +# and data wrangling that will be done) +# We have stored Airlie Beach's transportation parquet files in the folder with the file path data_source below as using the cloud-native Parquet files takes a much longer time to run +# We recommend downloading these cloud-native Parquet files to drive and replacing the data_source file to match +dir = str(Path('../../../../').resolve()) +data_source = Path(dir) / 'tests' / 'data' / 'overture' / 'theme=transportation' +output_dir = Path(fldr) / "theme=transportation" + +# For the sake of this example, we will choose the small town of Airlie Beach. +# The "bbox" parameter specifies the bounding box encompassing the desired geographical location. In the given example, this refers to the bounding box that encompasses Airlie Beach. +bbox = [148.7077, -20.2780, 148.7324, -20.2621 ] + +# We can create from a bounding box or a named place. +project.network.create_from_ovm(west=bbox[0], south=bbox[1], east=bbox[2], north=bbox[3], data_source=data_source, output_dir=data_source) + +# %% +# We grab all the links data as a Pandas DataFrame so we can process it easier +links = project.network.links.data + +# %% +# We create a Folium layer +network_links = folium.FeatureGroup("links") + +# We do some Python magic to transform this dataset into the format required by Folium +# We are only getting link_id and link_type into the map, but we could get other pieces of info as well +for i, row in links.iterrows(): + points = row.geometry.wkt.replace("LINESTRING ", "").replace("(", "").replace(")", "").split(", ") + points = "[[" + "],[".join([p.replace(" ", ", ") for p in points]) + "]]" + # we need to take from x/y to lat/long + points = [[x[1], x[0]] for x in eval(points)] + + line = folium.vector_layers.PolyLine( + points, popup=f"link_id: {row.link_id}", tooltip=f"{row.link_type}", color="blue", weight=10 + ).add_to(network_links) + +# %% +# We get the center of the region +long = (bbox[0]+bbox[2])/2 +lat = (bbox[1]+bbox[3])/2 + +# %% +map_osm = folium.Map(location=[lat, long], zoom_start=14) +network_links.add_to(map_osm) +folium.LayerControl().add_to(map_osm) +map_osm + +# %% +project.close() + +# %% diff --git a/docs/source/examples/creating_models/ovm.ipynb b/docs/source/examples/creating_models/ovm.ipynb new file mode 100644 index 000000000..666af0ff9 --- /dev/null +++ b/docs/source/examples/creating_models/ovm.ipynb @@ -0,0 +1,10010 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "from uuid import uuid4\n", + "from tempfile import gettempdir\n", + "from os.path import join\n", + "from aequilibrae import Project\n", + "import folium\n", + "\n", + "from aequilibrae.project.network.ovm_downloader import OVMDownloader\n", + "# sphinx_gallery_thumbnail_path = 'images/nauru.png'" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# We create an empty project on an arbitrary folder\n", + "fldr = join(gettempdir(), uuid4().hex)\n", + "project = Project()\n", + "project.new(fldr)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Now we can download the network from any place in the world (as long as you have memory for all the download\n", + "# and data wrangling that will be done)\n", + "\n", + "# We can create from a bounding box or a named place.\n", + "# For the sake of this example, we will choose a section of highway in Brisebane.\n", + "brisbane_bbox = [153.1771, -27.6851, 153.2018, -27.6703]\n", + "project.network.create_from_ovm(west=brisbane_bbox[0], south=brisbane_bbox[1], east=brisbane_bbox[2], north=brisbane_bbox[3], data_source=r'C:\\Users\\penny\\git\\data\\theme=transportation', output_dir=r'C:\\Users\\penny\\git\\Aequilibrae\\tests\\data\\overture\\theme=transportation')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# We grab all the links data as a Pandas DataFrame so we can process it easier\n", + "links = project.network.links.data\n", + "\n", + "# We create a Folium layer\n", + "network_links = folium.FeatureGroup(\"links\")\n", + "\n", + "# We do some Python magic to transform this dataset into the format required by Folium\n", + "# We are only getting link_id and link_type into the map, but we could get other pieces of info as well\n", + "for i, row in links.iterrows():\n", + " points = row.geometry.wkt.replace(\"LINESTRING \", \"\").replace(\"(\", \"\").replace(\")\", \"\").split(\", \")\n", + " points = \"[[\" + \"],[\".join([p.replace(\" \", \", \") for p in points]) + \"]]\"\n", + " # we need to take from x/y to lat/long\n", + " points = [[x[1], x[0]] for x in eval(points)]\n", + "\n", + " line = folium.vector_layers.PolyLine(\n", + " points, popup=f\"link_id: {row.link_id}\", tooltip=f\"{row.link_type}\", color=\"blue\", weight=10\n", + " ).add_to(network_links)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# We get the center of the region we are working with some SQL magic\n", + "# long = (bbox[0]+bbox[2])/2\n", + "# lat = (bbox[1]+bbox[3])/2\n", + "long = (brisbane_bbox[0]+brisbane_bbox[2])/2\n", + "lat = (brisbane_bbox[1]+brisbane_bbox[3])/2" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map_osm = folium.Map(location=[lat, long], zoom_start=14)\n", + "network_links.add_to(map_osm)\n", + "folium.LayerControl().add_to(map_osm)\n", + "map_osm" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "project.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index 6a1ba0519..ade9e5a0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,6 @@ shapely pandas pyproj rtree -openmatrix \ No newline at end of file +openmatrix +duckdb +geopandas \ No newline at end of file diff --git a/tests/aequilibrae/project/ovm/setup_test_data.ipynb b/tests/aequilibrae/project/ovm/setup_test_data.ipynb new file mode 100644 index 000000000..2a56d5a73 --- /dev/null +++ b/tests/aequilibrae/project/ovm/setup_test_data.ipynb @@ -0,0 +1,65 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Setup\n", + "\n", + "This notebook is used to download and explore the OVM data sets for use in our automated testing environment." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "from pathlib import Path\n", + "aeq_dir = str(Path('../../../../').resolve())\n", + "if aeq_dir not in sys.path:\n", + " sys.path.append(aeq_dir)\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import shapely\n", + "from aequilibrae.project.network.ovm_downloader import OVMDownloader\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_data_dir = Path(aeq_dir) / \"tests\" / \"data\" / \"overture\" / 'theme=transportation' \n", + "\n", + "\n", + "# ovm_downloader_instance = OVMDownloader([\"car\"], test_data_dir)\n", + "# ovm_downloader_instance._download_test_data('E:/theme=transportation/type=segment')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/aequilibrae/project/ovm/test_ovm_downloader.py b/tests/aequilibrae/project/ovm/test_ovm_downloader.py new file mode 100644 index 000000000..b67cc3705 --- /dev/null +++ b/tests/aequilibrae/project/ovm/test_ovm_downloader.py @@ -0,0 +1,32 @@ +import tempfile +from pathlib import Path +from aequilibrae.project.network.ovm_downloader import OVMDownloader + +data_dir = Path(__file__).parent.parent.parent.parent / "data" / "overture" / "theme=transportation" + + +def test_download(): + with tempfile.TemporaryDirectory() as output_dir: + o = OVMDownloader(["car"], output_dir) + + box1 = [148.713909, -20.272261, 148.7206475, -20.2702697] + gdf_link, gdf_node = o.downloadTransportation(bbox=box1, data_source=data_dir, output_dir=output_dir) + + for t in ["segment", "connector"]: + output_dir = Path(output_dir) + # bbo = [148.71641, -20.27082, 148.71861, -20.27001] + # woolworths_parkinglot = [148.718, -20.27049, 148.71889, -20.27006] + expected_file = output_dir / f"theme=transportation" / f"type={t}" / f"transportation_data_{t}.parquet" + assert expected_file.exists() + + link_columns = ["ovm_id", "connectors", "direction", "link_type", "name", "speed", "road", "geometry"] + for element in link_columns: + assert element in gdf_link.columns + + node_columns = ["ovm_id", "geometry"] + for element in node_columns: + assert element in gdf_node.columns + + # assert 'is_centroid' in gdf_node.columns + # assert ['unknown', 'secondary', 'residential', 'parkingAisle'] == list(list_gdf[0]['link_type'].unique()) + diff --git a/tests/aequilibrae/project/ovm/test_ovm_processor.py b/tests/aequilibrae/project/ovm/test_ovm_processor.py new file mode 100644 index 000000000..0a1e906f8 --- /dev/null +++ b/tests/aequilibrae/project/ovm/test_ovm_processor.py @@ -0,0 +1,246 @@ +import copy +import tempfile +from pathlib import Path +import geopandas as gpd +import shapely +from aequilibrae import global_logger + +from aequilibrae import Project +from aequilibrae.project.network.ovm_builder import OVMBuilder + + +def test_link_geo_trimmer(): + node1 = (148.7165148, -20.273062) + node2 = (148.7164104, -20.2730078) + geo = shapely.LineString([(148.7165748, -20.2730668), node1, (148.7164585, -20.2730418), node2]) + link_gdf = gpd.GeoDataFrame([[1, 2, geo]], columns=["a_node", "b_node", "geometry"]) + new_geom = copy.copy(link_gdf) + + node_lu = { + 1: {"lat": node1[1], "long": node1[0], "coord": node1}, + 2: {"lat": node2[1], "long": node2[0], "coord": node2}, + } + + with tempfile.TemporaryDirectory() as output_dir: + project_dir = str(Path(output_dir) / "project") + project = Project() + project.new(project_dir) + o = OVMBuilder(link_gdf, gpd.GeoDataFrame(), project_path=project_dir, project=project) + + # Iterate over the correct range + new_geom["geometry"] = [o.trim_geometry(node_lu, row) for e, row in link_gdf.iterrows()] + + # Assuming you want to assert the length of the new geometry + assert len(new_geom["geometry"][0].coords) == 3 + + # Assuming you want to assert the correctness of the new geometry + # If you don't need the difference operation, you can skip it + + for i in range(0, len(link_gdf)): + if i > 0: + assert new_geom["geometry"][i] == shapely.LineString([node1, (148.7164585, -20.2730418), node2]) + + project.close() + +def test_link_lanes(): + """ + segment and node infomation is currently [1] element of links when running from_ovm.py + """ + + no_info = None + simple = [{"direction": "backward"}, {"direction": "forward"}] + + lanes_3 = [ + { + "direction": "forward", + "restrictions": { + "access": [{"allowed": {"when": {"mode": ["hov"]}}}], + "minOccupancy": {"isAtLeast": 3}, + }, + }, + {"direction": "forward"}, + {"direction": "forward"}, + ] + + highway = [ + {"direction": "backward"}, + {"direction": "backward"}, + {"direction": "backward"}, + {"direction": "backward"}, + {"direction": "forward"}, + {"direction": "forward"}, + {"direction": "forward"}, + {"direction": "forward"}, + ] + + lane_ends = [ + [{ + "at": [0, 0.67], + "value": [{"direction": "backward"}, {"direction": "forward"}, {"direction": "forward"}], + } + ], + [{"at": [0.67, 1], "value": [{"direction": "backward"}, {"direction": "forward"}]}], + ] + + lane_begins = [ + [{"at": [0, 0.2], "value": [{"direction": "backward"}, {"direction": "forward"}]}], + [ + { + "at": [0.2, 1], + "value": [{"direction": "backward"}, {"direction": "forward"}, {"direction": "forward"}], + } + ], + ] + + lane_merge_twice = [ + [ + { + "at": [0, 0.2], + "value": [ + {"direction": "backward"}, + {"direction": "backward"}, + {"direction": "forward"}, + {"direction": "forward"}, + ], + } + ], + [ + { + "at": [0.2, 0.8], + "value": [{"direction": "backward"}, {"direction": "forward"}, {"direction": "forward"}], + } + ], + [{"at": [0.8, 1], "value": [{"direction": "backward"}, {"direction": "forward"}]}], + ] + + equal_dis = [ + [ + { + "at": [0, 0.5], + "value": [{"direction": "backward"}, {"direction": "forward"}, {"direction": "forward"}], + } + ], + [{"at": [0.5, 1], "value": [{"direction": "backward"}, {"direction": "forward"}]}], + ] + + # def road(lane): + # road_info = str( + # { + # "class": "secondary", + # "surface": "paved", + # "restrictions": {"speedLimits": {"maxSpeed": [70, "km/h"]}}, + # "roadNames": {"common": [{"language": "local", "value": "Shute Harbour Road"}]}, + # "lanes": lane, + # } + # ) + # return road_info + + a_node = {"ovm_id": "8f9d0e128cd9709-167FF64A37F1BFFB", "geometry": shapely.Point(148.72460, -20.27472)} + b_node = {"ovm_id": "8f9d0e128cd98d6-15FFF68E65613FDF", "geometry": shapely.Point(148.72471, -20.27492)} + node_df = gpd.GeoDataFrame(data=[a_node, b_node]) + + def segment(direction): + segment = { + "ovm_id": "8b9d0e128cd9fff-163FF6797FC40661", + "connectors": [["8f9d0e128cd9709-167FF64A37F1BFFB", "8f9d0e128cd98d6-15FFF68E65613FDF"]], + "direction": direction, + "link_type": "secondary", + "name": '[{"value": "Shute Harbour Road"}]', + "speed": '{"maxSpeed":[70,"km/h"]}', + "geometry": shapely.LineString( + [ + (148.7245987, -20.2747175), + (148.7246504, -20.2747531), + (148.724688, -20.274802), + (148.7247077, -20.2748593), + (148.7247078, -20.2749195), + ] + ), + } + return segment + + # def link_gdf(lane_info): + # return gpd.GeoDataFrame(segment(lane_info, road(lane_info))) + + def set_up_ovmbuilder(lane_info, output_dir, project): + print(lane_info) + print() + print(segment(lane_info)) + links = gpd.GeoDataFrame(segment(lane_info)) + print(links) + o = OVMBuilder(links, node_df, project_path=output_dir / "project", project=project) + o.create_node_ids(node_df) + o._worksetup() + link_gdf = o.formatting(links, node_df, output_dir) + print(link_gdf) + return link_gdf + + + with tempfile.TemporaryDirectory() as output_dir: + output_dir = Path(output_dir) + project = Project() + project.new(output_dir / "project") + + gdf_no_info = set_up_ovmbuilder(no_info, output_dir, project) + + assert gdf_no_info["direction"][0] == 0 + assert gdf_no_info["lanes_ab"][0] == 1 + assert gdf_no_info["lanes_ba"][0] == 1 + + gdf_simple = set_up_ovmbuilder(simple, output_dir, project) + + assert len(simple) == 2 + assert gdf_simple["direction"][0] == 0 + assert gdf_simple["lanes_ab"][0] == 1 + assert gdf_simple["lanes_ab"][0] == 1 + + gdf_lanes_3 = set_up_ovmbuilder(lanes_3, output_dir, project) + + assert len(lanes_3) == 3 + assert gdf_lanes_3["direction"][0] == 1 + assert gdf_lanes_3["lanes_ab"][0] == 3 + assert gdf_lanes_3["lanes_ba"][0] == None + + gdf_highway = set_up_ovmbuilder(highway, output_dir, project) + + assert len(highway) == 8 + assert gdf_highway["direction"][0] == 0 + assert gdf_highway["lanes_ab"][0] == 4 + assert gdf_highway["lanes_ba"][0] == 4 + + gdf_lane_ends = set_up_ovmbuilder(lane_ends, output_dir, project) + + assert len(lane_ends) == 2 + assert len(lane_ends[0][0]["value"]) == 3 + assert len(lane_ends[1][0]["value"]) == 2 + assert gdf_lane_ends["direction"][0] == 0 + assert gdf_lane_ends["lanes_ab"][0] == 2 + assert gdf_lane_ends["lanes_ba"][0] == 1 + + gdf_lane_begins = set_up_ovmbuilder(lane_begins, output_dir, project) + + assert len(lane_begins) == 2 + assert len(lane_begins[0][0]["value"]) == 2 + assert len(lane_begins[1][0]["value"]) == 3 + assert gdf_lane_begins["direction"][0] == 0 + assert gdf_lane_begins["lanes_ab"][0] == 2 + assert gdf_lane_begins["lanes_ba"][0] == 1 + + gdf_lane_merge_twice = set_up_ovmbuilder(lane_merge_twice, output_dir, project) + + assert len(lane_merge_twice) == 3 + assert len(lane_merge_twice[0][0]["value"]) == 4 + assert len(lane_merge_twice[1][0]["value"]) == 3 + assert len(lane_merge_twice[2][0]["value"]) == 2 + assert gdf_lane_merge_twice["direction"][0] == 0 + assert gdf_lane_merge_twice["lanes_ab"][0] == 2 + assert gdf_lane_merge_twice["lanes_ba"][0] == 1 + + gdf_equal_dis = set_up_ovmbuilder(equal_dis, output_dir, project) + + assert len(equal_dis) == 2 + assert len(equal_dis[0][0]["value"]) == 3 + assert len(equal_dis[1][0]["value"]) == 2 + assert gdf_equal_dis["direction"][0] == 0 + assert gdf_equal_dis["lanes_ab"][0] == 2 + assert gdf_equal_dis["lanes_ba"][0] == 1 diff --git a/tests/data/overture/theme=transportation/type=connector/airlie_beach_transportation_connector.parquet b/tests/data/overture/theme=transportation/type=connector/airlie_beach_transportation_connector.parquet new file mode 100644 index 000000000..dd81d19d4 Binary files /dev/null and b/tests/data/overture/theme=transportation/type=connector/airlie_beach_transportation_connector.parquet differ diff --git a/tests/data/overture/theme=transportation/type=segment/airlie_beach_transportation_segment.parquet b/tests/data/overture/theme=transportation/type=segment/airlie_beach_transportation_segment.parquet new file mode 100644 index 000000000..878a747ea Binary files /dev/null and b/tests/data/overture/theme=transportation/type=segment/airlie_beach_transportation_segment.parquet differ