From 0b55ea99146259e53af675bf226aba7e00ff83d7 Mon Sep 17 00:00:00 2001 From: Rob Savoye Date: Sun, 6 Oct 2024 16:02:59 -0600 Subject: [PATCH] Deployed e92e89c with MkDocs version: 1.5.3 --- 404.html | 20 + CHANGELOG/index.html | 20 + LICENSE/index.html | 20 + about/index.html | 20 + api/index.html | 108 ++-- calculations/index.html | 20 + conflation/index.html | 20 + formats/index.html | 20 + highways/index.html | 20 + index.html | 20 + mvum/index.html | 20 + odkconflation/index.html | 20 + osm-merge/index.html | 20 + osm.md~ | 12 + osmhighways.md~ | 37 ++ osmhighways/index.html | 1060 ++++++++++++++++++++++++++++++++++++++ search/search_index.json | 2 +- sitemap.xml | 39 +- sitemap.xml.gz | Bin 325 -> 328 bytes trails/index.html | 22 +- usgs/index.html | 20 + utilities/index.html | 42 +- wiki_redirect/index.html | 20 + zion/index.html | 20 + 24 files changed, 1558 insertions(+), 64 deletions(-) create mode 100644 osm.md~ create mode 100644 osmhighways.md~ create mode 100644 osmhighways/index.html diff --git a/404.html b/404.html index e889162..9f09e9c 100644 --- a/404.html +++ b/404.html @@ -675,6 +675,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/CHANGELOG/index.html b/CHANGELOG/index.html index 9176aca..922d145 100644 --- a/CHANGELOG/index.html +++ b/CHANGELOG/index.html @@ -698,6 +698,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/LICENSE/index.html b/LICENSE/index.html index 82d06b6..dcf3c68 100644 --- a/LICENSE/index.html +++ b/LICENSE/index.html @@ -883,6 +883,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/about/index.html b/about/index.html index a80965e..812b298 100644 --- a/about/index.html +++ b/about/index.html @@ -698,6 +698,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/api/index.html b/api/index.html index 6f6c687..67d84cf 100644 --- a/api/index.html +++ b/api/index.html @@ -13,7 +13,7 @@ - + @@ -684,6 +684,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + @@ -1429,7 +1449,8 @@

    conflator.py Source code in osm_merge/conflator.py -
    286
    +                  
    285
    +286
     287
     288
     289
    @@ -1454,8 +1475,7 @@ 

    conflator.py308 309 310 -311 -312

    def __init__(self,
    +311
    def __init__(self,
                  uri: str = None,
                  boundary: str = None
                  ):
    @@ -1585,7 +1605,8 @@ 

    Source code in osm_merge/conflator.py -
    391
    +            
    390
    +391
     392
     393
     394
    @@ -1657,8 +1678,7 @@ 

    460 461 462 -463 -464

    def getDistance(self,
    +463
    def getDistance(self,
             newdata: Feature,
             olddata: Feature,
             ) -> float:
    @@ -1836,7 +1856,8 @@ 

    Source code in osm_merge/conflator.py -
    466
    +            
    465
    +466
     467
     468
     469
    @@ -1931,8 +1952,7 @@ 

    558 559 560 -561 -562

    def checkTags(self,
    +561
    def checkTags(self,
                   extfeat: Feature,
                   osm: Feature,
                    ):
    @@ -2109,7 +2129,8 @@ 

    Source code in osm_merge/conflator.py -
    564
    +            
    563
    +564
     565
     566
     567
    @@ -2205,8 +2226,7 @@ 

    657 658 659 -660 -661

    def loadFile(
    +660
    def loadFile(
         self,
         osmfile: str,
     ) -> list:
    @@ -2381,7 +2401,8 @@ 

    Source code in osm_merge/conflator.py -
    663
    +            
    662
    +663
     664
     665
     666
    @@ -2398,8 +2419,7 @@ 

    677 678 679 -680 -681

    async def initInputDB(self,
    +680
    async def initInputDB(self,
                         config: str = None,
                         dburi: str = None,
                         ) -> bool:
    @@ -2493,7 +2513,8 @@ 

    Source code in osm_merge/conflator.py -
    683
    +            
    682
    +683
     684
     685
     686
    @@ -2507,8 +2528,7 @@ 

    694 695 696 -697 -698

    async def initOutputDB(self,
    +697
    async def initOutputDB(self,
                         dburi: str = None,
                         ):
         """
    @@ -2637,7 +2657,8 @@ 

    Source code in osm_merge/conflator.py -
    700
    +            
    699
    +700
     701
     702
     703
    @@ -2672,8 +2693,7 @@ 

    732 733 734 -735 -736

    async def createDBThreads(self,
    +735
    async def createDBThreads(self,
                         uri: str = None,
                         config: str = None,
                         execs: int = cores,
    @@ -2822,7 +2842,8 @@ 

    Source code in osm_merge/conflator.py -
    738
    +            
    737
    +738
     739
     740
     741
    @@ -2887,8 +2908,7 @@ 

    800 801 802 -803 -804

    async def conflateData(self,
    +803
    async def conflateData(self,
                     odkspec: str,
                     osmspec: str,
                     threshold: float = 3.0,
    @@ -2932,7 +2952,7 @@ 

    log.info(f"The secondary dataset has {len(osmdata)} entries") # Make threading optional for easier debugging - single = True # False + single = False if single: alldata = conflateThread(odkdata, osmdata) @@ -2981,12 +3001,12 @@

    Source code in osm_merge/conflator.py -
    806
    +            
    805
    +806
     807
     808
     809
    -810
    -811
    def dump(self):
    +810
    def dump(self):
         """
         Dump internal data for debugging.
         """
    @@ -3072,7 +3092,8 @@ 

    Source code in osm_merge/conflator.py -
    816
    +            
    815
    +816
     817
     818
     819
    @@ -3106,8 +3127,7 @@ 

    847 848 849 -850 -851

    def parseFile(self,
    +850
    def parseFile(self,
                 filespec: str,
                 ) ->list:
         """
    @@ -3223,7 +3243,8 @@ 

    Source code in osm_merge/conflator.py -
    853
    +            
    852
    +853
     854
     855
     856
    @@ -3290,8 +3311,7 @@ 

    917 918 919 -920 -921

    def conflateDB(self,
    +920
    def conflateDB(self,
                      source: str,
                      ) -> dict:
         """
    @@ -3430,7 +3450,8 @@ 

    Source code in osm_merge/conflator.py -
    924
    +            
    923
    +924
     925
     926
     927
    @@ -3497,8 +3518,7 @@ 

    988 989 990 -991 -992

    def writeOSM(self,
    +991
    def writeOSM(self,
                  data: list,
                  filespec: str,
                  ):
    @@ -3637,7 +3657,8 @@ 

    Source code in osm_merge/conflator.py -
     994
    +            
     993
    + 994
      995
      996
      997
    @@ -3649,8 +3670,7 @@ 

    1003 1004 1005 -1006 -1007

    def writeGeoJson(self,
    +1006
    def writeGeoJson(self,
                  data: dict,
                  filespec: str,
                  ):
    @@ -3745,7 +3765,8 @@ 

    Source code in osm_merge/conflator.py -
    1009
    +            
    1008
    +1009
     1010
     1011
     1012
    @@ -3775,8 +3796,7 @@ 

    1036 1037 1038 -1039 -1040

    def osmToFeature(self,
    +1039
    def osmToFeature(self,
                      osm: dict(),
                      ) -> Feature:
         """
    diff --git a/calculations/index.html b/calculations/index.html
    index 70ed3fe..4da7ed6 100644
    --- a/calculations/index.html
    +++ b/calculations/index.html
    @@ -798,6 +798,26 @@
     
                   
                 
    +              
    +                
    +  
    +  
    +  
    +    
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/conflation/index.html b/conflation/index.html index c8ce81e..3f25ee2 100644 --- a/conflation/index.html +++ b/conflation/index.html @@ -771,6 +771,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/formats/index.html b/formats/index.html index a8364ea..dd2a2bc 100644 --- a/formats/index.html +++ b/formats/index.html @@ -805,6 +805,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/highways/index.html b/highways/index.html index 910c944..3c9ceac 100644 --- a/highways/index.html +++ b/highways/index.html @@ -698,6 +698,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/index.html b/index.html index 0a44a7d..28faf2e 100644 --- a/index.html +++ b/index.html @@ -760,6 +760,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/mvum/index.html b/mvum/index.html index 2885ff8..6bbe19c 100644 --- a/mvum/index.html +++ b/mvum/index.html @@ -819,6 +819,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/odkconflation/index.html b/odkconflation/index.html index 80fca70..9e20b7e 100644 --- a/odkconflation/index.html +++ b/odkconflation/index.html @@ -777,6 +777,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/osm-merge/index.html b/osm-merge/index.html index 84adf43..28910c1 100644 --- a/osm-merge/index.html +++ b/osm-merge/index.html @@ -698,6 +698,26 @@ + + + + + +
  • + + + + + OSM Highways + + + + +
  • + + + + diff --git a/osm.md~ b/osm.md~ new file mode 100644 index 0000000..b28788e --- /dev/null +++ b/osm.md~ @@ -0,0 +1,12 @@ +# OpenStreetMap Data + +## Old Imports + +OpenStreetMap (OSM) has a past history of imports, often done way back +when OSM had little highway data. This was a way to bootstrap +navigation, and it mostly worked. + +### TIGER + + +### Motor Vehivle Use Map (MVUM) diff --git a/osmhighways.md~ b/osmhighways.md~ new file mode 100644 index 0000000..db045ce --- /dev/null +++ b/osmhighways.md~ @@ -0,0 +1,37 @@ +# OpenStreetMap Data + +## Old Imports + +OpenStreetMap (OSM) has a past history of imports, often done way back +when OSM had little highway data. This was a way to bootstrap +navigation, and it mostly worked. + +### TIGER + +Since it was publically available, the data [used by the US Census +Department](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-geodatabase-file.html) +was imported around 2007. The data is of varying quality, but was +better than nothing. The OSM community has been cleaning up the mess +ever since. More information on the TIGER fixup can be [found +here](https://wiki.openstreetmap.org/wiki/TIGER_fixup). + +An example of the tags added from TIGER: + + + + + + + + + +### Motor Vehivle Use Map (MVUM) + +The MVUM data is highways in national forests, so useful in remote +area not always in TIGER. Or in TIGER but completely wrong. I've seen +roads in TIGER that don't actually exist. All the MVUM data is better +quality as much of the data was mapped by ground-truthing. It has +useful data fields, like is a high clearance vehicle needed, what is +the surface, and other access data like are ATVs allowed ? + +[MVUM)](https://data.fs.usda.gov/geodata/edw/edw_resources/shp/S_USA.Road_MVUM.zip diff --git a/osmhighways/index.html b/osmhighways/index.html new file mode 100644 index 0000000..6495fb3 --- /dev/null +++ b/osmhighways/index.html @@ -0,0 +1,1060 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + OSM Highways - osm-merge + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    OpenStreetMap Data

    +

    Being crowd sourced and open to all who want to contribute, +OpenStreetMap (OSM) has infinite flexibility in the various tag/values +used for metadata. Many of the tags not in common use are ignored by +the renderers and routing engines, but still live in the database and +data files. You'd really only notice if you're deep in the data, which +is the key to good conflation.

    +

    The features in OSM come from a wide variety of sources. Mobile apps, +imports, satellite imagery. Often features traced from imagery are +lacking any tags beyond building=yes or highway=track, which +we hope to improve on by conflating with other datasets.

    +

    Data Janitor

    +

    Being a data janitor is important, if rather boring and tedious +task. Bugs in the data can lead to navigation problems at the very +least. An accurate and detailed map is a thing of beauty, and often +OSM gets really close.

    +

    Unfortunately to conflate OSM data with external data sources, it +needs to be cleaned up. Normally it gets cleaned up by the mapper, who +has to manually review and edit the tags. Since the highway name is an +important item used to confirm a near match in geometry, too much +variety can make this a slow process.

    +

    This project has an +osmhighways.py +program that is used to cleanup some of the problems, like deleting +unnecessary tags, and fixing the name vs reference number +problem. Deleting all bogus tags reduces the data size, which is a +benefit. This project also extracts only highway linestrings, so a +clean dataset for conflating geometries.

    +

    Old Imports

    +

    OpenStreetMap (OSM) has a past history of imports, often done way back +when OSM had little highway data. This was a way to bootstrap +navigation, and it mostly worked.

    +

    TIGER

    +

    Since it was publically available, the data used by the US Census +Department +was imported around 2007. The data is of varying quality, but was +better than nothing. The OSM community has been cleaning up the mess +ever since. More information on the TIGER fixup can be found +here.

    +

    An small example of the tags added from TIGER, all of which can be +deleted.

    +
        <tag k="tiger:name_base" v="75th"/>
    +    <tag k="tiger:name_base_1" v="75th"/>
    +    <tag k="tiger:name_direction_prefix" v="N"/>
    +    <tag k="tiger:name_type" v="St"/>
    +    <tag k="tiger:name_type_1" v="St"/>
    +    <tag k="tiger:cfcc" v="A41"/>
    +    <tag k="tiger:reviewed" v="no"/>
    +
    +

    I don't think I've ever seen a tiger:reviewed=yes tag.

    +

    Motor Vehicle Use Map (MVUM)

    +

    The MVUM data is highways in national forests, so useful in remote +area not always in TIGER. Or in TIGER but completely wrong. I've seen +roads in TIGER that don't actually exist. All the MVUM data is better +quality as much of the data was mapped by ground-truthing. It has +useful data fields, like is a high clearance vehicle needed, what is +the surface, and other access data like are ATVs allowed ?

    +

    [MVUM)](https://data.fs.usda.gov/geodata/edw/edw_resources/shp/S_USA.Road_MVUM.zip

    +

    Clipping

    +

    To support conflation, even OSM data needs to be chopped into smaller +pieces. While osmium and osmfilter could so this, I've had problmes +with the other tools when the task polygon is small. The +osmhighways.py program also clips files. Since it's OSM data, we can't +really use shapely, or geopandas, just osmium. It's a bit slow, being +pure python.

    + +
    +
    + + + Last update: + October 4, 2024 + + + +
    + + + + + + +
    +
    + + +
    + +
    + +
    + + +
    + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/search/search_index.json b/search/search_index.json index 71310f8..f33923e 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"OSM Merge","text":"

    Merge features and tags into existing OSM data.

    \ud83d\udcd6 Documentation: https://hotosm.github.io/osm-merge/

    \ud83d\udda5\ufe0f Source Code: https://github.com/hotosm/osm-merge

    "},{"location":"#background","title":"Background","text":"

    This is a project for conflating map data, with the ultimate goal of importing it into OpenStreetMap. It is oriented towards processing non OSM external datasets.

    This project uses a huge amount of data (and disk space) if you start from the original nation wide datasets, which are too large to edit. There is a contrib script in the git sources I use to start breaking down the huge files into managable pieces.

    The goal of this project is two-fold. One is to support field data collection using OpenDataKit. The osm-fieldwork project can be used to convert the ODK data files into GeoJson and OSM XML. This project then supports conflating that field collected data with current OpenStreetMap. Otherwise this is a time-consuming process to do manually.

    The other goal is focused on emergency access in remote areas. This is improving the Motor Vehicle Use Map (MVUM) datasets of all highways (mostly jeep trails) in OpenStreetMap. The current data in OSM was often imported complete with bugs in the original dataset, or the only details are highway=track. All of these have a US forest service reference number and name. Adding those makes it much easier to communicate a location.

    "},{"location":"#programs","title":"Programs","text":""},{"location":"#conflatorpy","title":"conflator.py","text":"

    This program doesn't require a database, unlike the other conflation programs. It is focused on conflation OpenDataKit with OpenStreetMap, as well as conflating rural highways. It'll conflate any two datasets in either GeoJson or OSM XML format. While this is currently under heavy development and debugging by processing large amounts of data to track down all the obscure bugs in the original datasets, or the conflation process.

    "},{"location":"#the-data","title":"The Data","text":"

    Much of the process of conflation is splitting huge datasets into managable sized files. Since that process is mostly automated, I have a collection of files where I have done that part. Since conflation also requires converting the original datasets, the original files are included, the converted files to OSM XML & GeoJson, and the results of conflation. Not all the national forests and parks have been conflated yet, but the data is there for others that may wish to try. The Map Data is on a slow server, sorry. Disk space is cheaper than network bandwidth.

    "},{"location":"CHANGELOG/","title":"Changelog","text":""},{"location":"LICENSE/","title":"GNU AFFERO GENERAL PUBLIC LICENSE","text":"

    Version 3, 19 November 2007

    Copyright (C) 2007 Free Software Foundation, Inc. https://fsf.org/

    Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.

    "},{"location":"LICENSE/#preamble","title":"Preamble","text":"

    The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software.

    The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users.

    When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.

    Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software.

    A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public.

    The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version.

    An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license.

    The precise terms and conditions for copying, distribution and modification follow.

    "},{"location":"LICENSE/#terms-and-conditions","title":"TERMS AND CONDITIONS","text":""},{"location":"LICENSE/#0-definitions","title":"0. Definitions.","text":"

    \"This License\" refers to version 3 of the GNU Affero General Public License.

    \"Copyright\" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.

    \"The Program\" refers to any copyrightable work licensed under this License. Each licensee is addressed as \"you\". \"Licensees\" and \"recipients\" may be individuals or organizations.

    To \"modify\" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a \"modified version\" of the earlier work or a work \"based on\" the earlier work.

    A \"covered work\" means either the unmodified Program or a work based on the Program.

    To \"propagate\" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.

    To \"convey\" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.

    An interactive user interface displays \"Appropriate Legal Notices\" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.

    "},{"location":"LICENSE/#1-source-code","title":"1. Source Code.","text":"

    The \"source code\" for a work means the preferred form of the work for making modifications to it. \"Object code\" means any non-source form of a work.

    A \"Standard Interface\" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.

    The \"System Libraries\" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A \"Major Component\", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.

    The \"Corresponding Source\" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.

    The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.

    The Corresponding Source for a work in source code form is that same work.

    "},{"location":"LICENSE/#2-basic-permissions","title":"2. Basic Permissions.","text":"

    All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.

    You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.

    Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.

    "},{"location":"LICENSE/#3-protecting-users-legal-rights-from-anti-circumvention-law","title":"3. Protecting Users' Legal Rights From Anti-Circumvention Law.","text":"

    No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.

    When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures.

    "},{"location":"LICENSE/#4-conveying-verbatim-copies","title":"4. Conveying Verbatim Copies.","text":"

    You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.

    You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.

    "},{"location":"LICENSE/#5-conveying-modified-source-versions","title":"5. Conveying Modified Source Versions.","text":"

    You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:

    • a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
    • b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to \"keep intact all notices\".
    • c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
    • d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.

    A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an \"aggregate\" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.

    "},{"location":"LICENSE/#6-conveying-non-source-forms","title":"6. Conveying Non-Source Forms.","text":"

    You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:

    • a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
    • b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
    • c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
    • d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
    • e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.

    A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.

    A \"User Product\" is either (1) a \"consumer product\", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, \"normally used\" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.

    \"Installation Information\" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.

    If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).

    The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.

    Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.

    "},{"location":"LICENSE/#7-additional-terms","title":"7. Additional Terms.","text":"

    \"Additional permissions\" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.

    When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.

    Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:

    • a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
    • b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
    • c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
    • d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
    • e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
    • f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.

    All other non-permissive additional terms are considered \"further restrictions\" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.

    If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.

    Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.

    "},{"location":"LICENSE/#8-termination","title":"8. Termination.","text":"

    You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).

    However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.

    Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.

    Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.

    "},{"location":"LICENSE/#9-acceptance-not-required-for-having-copies","title":"9. Acceptance Not Required for Having Copies.","text":"

    You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.

    "},{"location":"LICENSE/#10-automatic-licensing-of-downstream-recipients","title":"10. Automatic Licensing of Downstream Recipients.","text":"

    Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.

    An \"entity transaction\" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.

    You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.

    "},{"location":"LICENSE/#11-patents","title":"11. Patents.","text":"

    A \"contributor\" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's \"contributor version\".

    A contributor's \"essential patent claims\" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, \"control\" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.

    Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.

    In the following three paragraphs, a \"patent license\" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To \"grant\" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.

    If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. \"Knowingly relying\" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.

    If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.

    A patent license is \"discriminatory\" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.

    Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.

    "},{"location":"LICENSE/#12-no-surrender-of-others-freedom","title":"12. No Surrender of Others' Freedom.","text":"

    If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.

    "},{"location":"LICENSE/#13-remote-network-interaction-use-with-the-gnu-general-public-license","title":"13. Remote Network Interaction; Use with the GNU General Public License.","text":"

    Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph.

    Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License.

    "},{"location":"LICENSE/#14-revised-versions-of-this-license","title":"14. Revised Versions of this License.","text":"

    The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.

    Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License \"or any later version\" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation.

    If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program.

    Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.

    "},{"location":"LICENSE/#15-disclaimer-of-warranty","title":"15. Disclaimer of Warranty.","text":"

    THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

    "},{"location":"LICENSE/#16-limitation-of-liability","title":"16. Limitation of Liability.","text":"

    IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

    "},{"location":"LICENSE/#17-interpretation-of-sections-15-and-16","title":"17. Interpretation of Sections 15 and 16.","text":"

    If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.

    END OF TERMS AND CONDITIONS

    "},{"location":"LICENSE/#how-to-apply-these-terms-to-your-new-programs","title":"How to Apply These Terms to Your New Programs","text":"

    If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.

    To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the \"copyright\" line and a pointer to where the full notice is found.

        <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU Affero General Public License as\n    published by the Free Software Foundation, either version 3 of the\n    License, or (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU Affero General Public License for more details.\n\n    You should have received a copy of the GNU Affero General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n

    Also add information on how to contact you by electronic and paper mail.

    If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a \"Source\" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements.

    You should also get your employer (if you work as a programmer) or school, if any, to sign a \"copyright disclaimer\" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see https://www.gnu.org/licenses/.

    "},{"location":"about/","title":"Conflator","text":"

    This is a project for conflating map data, with the ultimate goal of importing it into OpenStreetMap(OSM).

    It is oriented towards conflating external datasets with existing OSM data. External data is usually polygons (building footprints), or POIs. These days there are multiple publically available building footprint datasets with an appropriate license for OSM. The problem is this data needs to be validated.

    Due to the flexibility of the OSM data schema, it's impossible to get 100% perfect conflation. But the purely manual conflation is very time consuming and tedious. This project aims to do as much as possible to aid the validator to make their work as efficient as possible.

    "},{"location":"api/","title":"API Docs for conflator","text":""},{"location":"api/#conflatorpy","title":"conflator.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default uri str

    URI for the primary database

    None boundary str

    Boundary to limit SQL queries

    None

    Returns:

    Type Description Conflator

    An instance of this object

    Source code in osm_merge/conflator.py
    def __init__(self,\n             uri: str = None,\n             boundary: str = None\n             ):\n    \"\"\"\n    Initialize Input data sources.\n\n    Args:\n        uri (str): URI for the primary database\n        boundary (str, optional): Boundary to limit SQL queries\n\n    Returns:\n        (Conflator): An instance of this object\n    \"\"\"\n    self.postgres = list()\n    self.tags = dict()\n    self.boundary = boundary\n    self.dburi = uri\n    self.primary = None\n    if boundary:\n        infile = open(boundary, 'r')\n        self.boundary = geojson.load(infile)\n        infile.close()\n    # Distance in meters for conflating with postgis\n    self.tolerance = 7\n    self.data = dict()\n    self.analyze = (\"building\", \"name\", \"amenity\", \"landuse\", \"cuisine\", \"tourism\", \"leisure\")\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflator.Conflator.getDistance","title":"getDistance","text":"
    getDistance(newdata, olddata)\n

    Compute the distance between two features in meters

    Parameters:

    Name Type Description Default newdata Feature

    A feature from the external dataset

    required olddata Feature

    A feature from the existing OSM dataset

    required

    Returns:

    Type Description float

    The distance between the two features

    Source code in osm_merge/conflator.py
    def getDistance(self,\n        newdata: Feature,\n        olddata: Feature,\n        ) -> float:\n    \"\"\"\n    Compute the distance between two features in meters\n\n    Args:\n        newdata (Feature): A feature from the external dataset\n        olddata (Feature): A feature from the existing OSM dataset\n\n    Returns:\n        (float): The distance between the two features\n    \"\"\"\n    # timer = Timer(text=\"getDistance() took {seconds:.0f}s\")\n    # timer.start()\n    # dist = shapely.hausdorff_distance(center, wkt)\n    dist = float()\n\n    # Transform so the results are in meters instead of degress of the\n    # earth's radius.\n    project = pyproj.Transformer.from_proj(\n        pyproj.Proj(init='epsg:4326'),\n        pyproj.Proj(init='epsg:3857')\n        )\n    newobj = transform(project.transform, shape(newdata[\"geometry\"]))\n    oldobj = transform(project.transform, shape(olddata[\"geometry\"]))\n\n    # FIXME: we shouldn't ever get here...\n    if oldobj.type == \"MultiLineString\":\n        log.error(f\"MultiLineString unsupported!\")\n\n    if newobj.type == \"MultiLineString\":\n        lines = newobj.geoms\n    elif newobj.type == \"GeometryCollection\":\n        lines = newobj.geoms\n    else:\n        lines = MultiLineString([newobj]).geoms\n\n    # dists = list()\n    best = None\n    for segment in lines:\n        if oldobj.geom_type == \"LineString\" and segment.geom_type == \"LineString\":\n            # Compare two highways\n            if oldobj.within(segment):\n                log.debug(f\"CONTAINS\")\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"LineString\":\n            # We only want to compare LineStrings, so force the distance check\n            # to be False\n            dist = 12345678.9\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"Point\":\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"Polygon\" and segment.geom_type == \"Polygon\":\n            # compare two buildings\n            pass\n        elif oldobj.geom_type == \"Polygon\" and segment.geom_type == \"Point\":\n            # Compare a point with a building, used for ODK Collect data\n            center = shapely.centroid(oldobj)\n            dist = segment.distance(center)\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"LineString\":\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"LineString\" and segment.geom_type == \"Point\":\n            dist = segment.distance(oldobj)\n\n        # Find the closest segment\n        if best is None:\n            best = dist\n        elif dist < best:\n            # log.debug(f\"BEST: {best} < {dist}\")\n            best = dist\n\n    # timer.stop()\n    return best # dist # best\n
    "},{"location":"api/#osm_merge.conflator.Conflator.checkTags","title":"checkTags","text":"
    checkTags(extfeat, osm)\n

    Check tags between 2 features.

    Parameters:

    Name Type Description Default extfeat Feature

    The feature from the external dataset

    required osm Feature

    The result of the SQL query

    required

    Returns:

    Type Description int

    The number of tag matches

    dict

    The updated tags

    Source code in osm_merge/conflator.py
    def checkTags(self,\n              extfeat: Feature,\n              osm: Feature,\n               ):\n    \"\"\"\n    Check tags between 2 features.\n\n    Args:\n        extfeat (Feature): The feature from the external dataset\n        osm (Feature): The result of the SQL query\n\n    Returns:\n        (int): The number of tag matches\n        (dict): The updated tags\n    \"\"\"\n    match_threshold = 85\n    match = [\"name\", \"ref\", \"ref:usfs\"]\n    hits = 0\n    props = dict()\n    id = 0\n    version = 0\n    props = extfeat['properties'] | osm['properties']\n    # ODK Collect adds these two tags we don't need.\n    if \"title\" in props:\n        del props[\"title\"]\n    if \"label\" in props:\n        del props[\"label\"]\n\n    if \"id\" in props:\n        # External data not from an OSM source always has\n        # negative IDs to distinguish it from current OSM data.\n        id = int(props[\"id\"])\n    else:\n        id -= 1\n        props[\"id\"] = id\n\n    if \"version\" in props:\n        # Always use the OSM version if it exists, since it gets\n        # incremented so JOSM see it's been modified.\n        props[\"version\"] = int(version)\n        # Name may also be name:en, name:np, etc... There may also be\n        # multiple name:* values in the tags.\n    else:\n        props[\"version\"] = 1\n\n    for key in match:\n        if \"highway\" in osm[\"properties\"]:\n            # Always use the value in the secondary, which is\n            # likely OSM.\n            props[\"highway\"] = osm[\"properties\"][\"highway\"]\n        if key not in props:\n            continue\n\n        # Usually it's the name field that has the most variety in\n        # in trying to match strings. This often is differences in\n        # capitalization, singular vs plural, and typos from using\n        # your phone to enter the name. Course names also change\n        # too so if it isn't a match, use the new name from the\n        # external dataset.\n        if key in osm[\"properties\"] and key in extfeat[\"properties\"]:\n            # Sometimes there will be a word match, which returns a\n            # ratio in the low 80s. In that case they should be\n            # a similar length.\n            length = len(extfeat[\"properties\"][key]) - len(osm[\"properties\"][key])\n            ratio = fuzz.ratio(extfeat[\"properties\"][key].lower(), osm[\"properties\"][key].lower())\n            if ratio > match_threshold and length <= 3:\n                hits += 1\n                props[\"ratio\"] = ratio\n                props[key] = extfeat[\"properties\"][key]\n                if ratio != 100:\n                    # Often the only difference is using FR or FS as the\n                    # prefix. In that case, see if the ref matches.\n                    if key[:3] == \"ref\":\n                        # This assume all the data has been converted\n                        # by one of the utility programs, which enfore\n                        # using the ref:usfs tag.\n                        tmp = extfeat[\"properties\"][\"ref:usfs\"].split(' ')\n                        extref = tmp[1].upper()\n                        tmp = osm[\"properties\"][\"ref:usfs\"].split(' ')\n                        newref = tmp[1].upper()\n                        # log.debug(f\"REFS: {extref} vs {newref}: {extref == newref}\")\n                        if extref == newref:\n                            hits += 1\n                            # Many minor changes of FS to FR don't require\n                            # caching the exising value as it's only the\n                            # prefix that changed. It always stayes in this\n                            # range.\n                            if osm[\"properties\"][\"ref:usfs\"][:3] == \"FS \" and ratio > 80 and ratio < 90:\n                                # log.debug(f\"Ignoring old ref {osm[\"properties\"][\"ref:usfs\"]}\")\n                                continue\n                    # For a fuzzy match, cache the value from the\n                    # secondary dataset and use the value in the\n                    # primary dataset.\n                    props[f\"old_{key}\"] = osm[\"properties\"][key]\n\n    # print(props)\n    return hits, props\n
    "},{"location":"api/#osm_merge.conflator.Conflator.loadFile","title":"loadFile","text":"
    loadFile(osmfile)\n

    Read a OSM XML file and convert it to GeoJson for consistency.

    Parameters:

    Name Type Description Default osmfile str

    The OSM XML file to load

    required

    Returns:

    Type Description list

    The entries in the OSM XML file

    Source code in osm_merge/conflator.py
    def loadFile(\n    self,\n    osmfile: str,\n) -> list:\n    \"\"\"\n    Read a OSM XML file and convert it to GeoJson for consistency.\n\n    Args:\n        osmfile (str): The OSM XML file to load\n\n    Returns:\n        (list): The entries in the OSM XML file\n    \"\"\"\n    alldata = list()\n    size = os.path.getsize(osmfile)\n    with open(osmfile, \"r\") as file:\n        xml = file.read(size)\n        doc = xmltodict.parse(xml)\n        if \"osm\" not in doc:\n            logging.warning(\"No data in this instance\")\n            return False\n        data = doc[\"osm\"]\n        if \"node\" not in data:\n            logging.warning(\"No nodes in this instance\")\n            return False\n\n    nodes = dict()\n    for node in data[\"node\"]:\n        properties = {\n            \"id\": int(node[\"@id\"]),\n        }\n        if \"@version\" not in node:\n            properties[\"version\"] = 1\n        else:\n            properties[\"version\"] = node[\"@version\"]\n\n        if \"@timestamp\" in node:\n            properties[\"timestamp\"] = node[\"@timestamp\"]\n\n        if \"tag\" in node:\n            for tag in node[\"tag\"]:\n                if type(tag) == dict:\n                    # Drop all the TIGER tags based on\n                    # https://wiki.openstreetmap.org/wiki/TIGER_fixup\n                    if tag[\"@k\"] in properties:\n                        if properties[tag[\"@k\"]][:7] == \"tiger:\":\n                            continue\n                    properties[tag[\"@k\"]] = tag[\"@v\"].strip()\n                    # continue\n                else:\n                    properties[node[\"tag\"][\"@k\"]] = node[\"tag\"][\"@v\"].strip()\n                # continue\n        geom = Point((float(node[\"@lon\"]), float(node[\"@lat\"])))\n        # cache the nodes so we can dereference the refs into\n        # coordinates, but we don't need them in GeoJson format.\n        nodes[properties[\"id\"]] = geom\n        if len(properties) > 2:\n            alldata.append(Feature(geometry=geom, properties=properties))\n\n    for way in data[\"way\"]:\n        attrs = dict()\n        properties = {\n            \"id\": int(way[\"@id\"]),\n        }\n        refs = list()\n        if \"nd\" in way:\n            if len(way[\"nd\"]) > 0:\n                for ref in way[\"nd\"]:\n                    refs.append(int(ref[\"@ref\"]))\n            properties[\"refs\"] = refs\n\n        if \"@version\" not in node:\n            properties[\"version\"] = 1\n        else:\n            properties[\"version\"] = node[\"@version\"]\n\n        if \"@timestamp\" in node:\n            attrs[\"timestamp\"] = node[\"@timestamp\"]\n\n        if \"tag\" in way:\n            for tag in way[\"tag\"]:\n                if type(tag) == dict:\n                    properties[tag[\"@k\"]] = tag[\"@v\"].strip()\n                    # continue\n                else:\n                    properties[way[\"tag\"][\"@k\"]] = way[\"tag\"][\"@v\"].strip()\n                # continue\n        # geom =\n        tmp = list()\n        for ref in refs:\n            tmp.append(nodes[ref]['coordinates'])\n        geom = LineString(tmp)\n        if geom is None:\n            breakpoint()\n        log.debug(f\"WAY: {properties}\")\n        alldata.append(Feature(geometry=geom, properties=properties))\n\n    return alldata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.initInputDB","title":"initInputDB async","text":"
    initInputDB(config=None, dburi=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None

    Returns: (bool): Whether it initialiized

    Source code in osm_merge/conflator.py
    async def initInputDB(self,\n                    config: str = None,\n                    dburi: str = None,\n                    ) -> bool:\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    Returns:\n        (bool): Whether it initialiized\n    \"\"\"\n    db = GeoSupport(dburi, config)\n    await db.initialize()\n    self.postgres.append(db)\n\n    return True\n
    "},{"location":"api/#osm_merge.conflator.Conflator.initOutputDB","title":"initOutputDB async","text":"
    initOutputDB(dburi=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    required Source code in osm_merge/conflator.py
    async def initOutputDB(self,\n                    dburi: str = None,\n                    ):\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    \"\"\"\n    if dburi:\n        self.dburi = dburi\n        await self.createDBThreads(dburi, config)\n    elif self.dburi:\n        await self.createDBThreads(self.dburi, config)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.createDBThreads","title":"createDBThreads async","text":"
    createDBThreads(uri=None, config=None, execs=cores)\n

    Create threads for writting to the primary datatbase to avoid problems with corrupting data.

    Parameters:

    Name Type Description Default uri str

    URI for the primary database

    None config str

    The config file from the osm-rawdata project

    None threads int

    The number of threads to create

    required

    Returns:

    Type Description bool

    Whether the threads were created sucessfully

    Source code in osm_merge/conflator.py
    async def createDBThreads(self,\n                    uri: str = None,\n                    config: str = None,\n                    execs: int = cores,\n                    ) -> bool:\n    \"\"\"\n    Create threads for writting to the primary datatbase to avoid\n    problems with corrupting data.\n\n    Args:\n        uri (str): URI for the primary database\n        config (str, optional): The config file from the osm-rawdata project\n        threads (int, optional): The number of threads to create\n\n    Returns:\n        (bool): Whether the threads were created sucessfully\n    \"\"\"\n    # Each thread needs it's own connection to postgres to avoid problems\n    # when inserting or updating the primary database.\n    if uri:\n        for thread in range(0, execs + 1):\n            db = GeoSupport(uri)\n            await db.initialize(uri, config)\n            if not db:\n                return False\n            self.postgres.append(db)\n        if self.boundary:\n            if 'features' in self.boundary:\n                poly = self.boundary[\"features\"][0][\"geometry\"]\n            else:\n                poly = shape(self.boundary['geometry'])\n\n            # FIXME: we only need to clip once to create the view, this is not\n            # confirmed yet.\n            await db.clipDB(poly, self.postgres[0])\n\n        return True\n
    "},{"location":"api/#osm_merge.conflator.Conflator.conflateData","title":"conflateData async","text":"
    conflateData(odkspec, osmspec, threshold=3.0)\n

    Open the two source files and contlate them.

    Parameters:

    Name Type Description Default odkspec str

    The external data uri

    required osmspec str

    The existing OSM data uri

    required threshold float

    Threshold for distance calculations in meters

    3.0

    Returns:

    Type Description list

    The conflated output

    Source code in osm_merge/conflator.py
    async def conflateData(self,\n                odkspec: str,\n                osmspec: str,\n                threshold: float = 3.0,\n                ) -> list:\n    \"\"\"\n    Open the two source files and contlate them.\n\n    Args:\n        odkspec (str): The external data uri\n        osmspec (str): The existing OSM data uri\n        threshold (float): Threshold for distance calculations in meters\n\n    Returns:\n        (list):  The conflated output\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    odkdata = list()\n    osmdata = list()\n\n    result = list()\n    if odkspec[:3].lower() == \"pg:\":\n        db = GeoSupport(odkspec[3:])\n        result = await db.queryDB()\n    else:\n        odkdata = self.parseFile(odkspec)\n\n    if osmspec[:3].lower() == \"pg:\":\n        db = GeoSupport(osmspec[3:])\n        result = await db.queryDB()\n    else:\n        osmdata = self.parseFile(osmspec)\n\n    entries = len(odkdata)\n    chunk = round(entries / cores)\n\n    alldata = list()\n    tasks = list()\n\n    log.info(f\"The primary dataset has {len(odkdata)} entries\")\n    log.info(f\"The secondary dataset has {len(osmdata)} entries\")\n\n    # Make threading optional for easier debugging\n    single = True # False\n\n    if single:\n        alldata = conflateThread(odkdata, osmdata)\n    else:\n        futures = list()\n        with concurrent.futures.ProcessPoolExecutor(max_workers=cores) as executor:\n            for block in range(0, entries, chunk):\n                future = executor.submit(conflateThread,\n                        odkdata[block:block + chunk - 1],\n                        osmdata\n                        )\n                futures.append(future)\n            #for thread in concurrent.futures.wait(futures, return_when='ALL_COMPLETED'):\n            for future in concurrent.futures.as_completed(futures):\n                log.debug(f\"Waiting for thread to complete..\")\n                alldata += future.result()\n\n        executor.shutdown()\n\n    timer.stop()\n\n    return alldata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.dump","title":"dump","text":"
    dump()\n

    Dump internal data for debugging.

    Source code in osm_merge/conflator.py
    def dump(self):\n    \"\"\"\n    Dump internal data for debugging.\n    \"\"\"\n    print(f\"Data source is: {self.dburi}\")\n    print(f\"There are {len(self.data)} existing features\")\n
    "},{"location":"api/#osm_merge.conflator.Conflator.parseFile","title":"parseFile","text":"
    parseFile(filespec)\n

    Parse the input file based on it's format.

    Parameters:

    Name Type Description Default filespec str

    The file to parse

    required

    Returns:

    Type Description list

    The parsed data from the file

    Source code in osm_merge/conflator.py
    def parseFile(self,\n            filespec: str,\n            ) ->list:\n    \"\"\"\n    Parse the input file based on it's format.\n\n    Args:\n        filespec (str): The file to parse\n\n    Returns:\n        (list): The parsed data from the file\n    \"\"\"\n    odkpath = Path(filespec)\n    odkdata = list()\n    if odkpath.suffix == '.geojson':\n        # FIXME: This should also work for any GeoJson file, not\n        # only ODK ones, but this has yet to be tested.\n        log.debug(f\"Parsing GeoJson files {odkpath}\")\n        odkfile = open(odkpath, 'r')\n        features = geojson.load(odkfile)\n        odkdata = features['features']\n    elif odkpath.suffix == '.osm':\n        log.debug(f\"Parsing OSM XML files {odkpath}\")\n        osmfile = OsmFile()\n        odkdata = self.loadFile(odkpath)\n    elif odkpath.suffix == \".csv\":\n        log.debug(f\"Parsing csv files {odkpath}\")\n        odk = ODKParsers()\n        for entry in odk.CSVparser(odkpath):\n            odkdata.append(odk.createEntry(entry))\n    elif odkpath.suffix == \".json\":\n        log.debug(f\"Parsing json files {odkpath}\")\n        odk = ODKParsers()\n        for entry in odk.JSONparser(odkpath):\n            odkdata.append(odk.createEntry(entry))\n    return odkdata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.conflateDB","title":"conflateDB","text":"
    conflateDB(source)\n

    Conflate all the data. This the primary interfacte for conflation.

    Parameters:

    Name Type Description Default source str

    The source file to conflate

    required

    Returns:

    Type Description dict

    The conflated features

    Source code in osm_merge/conflator.py
    def conflateDB(self,\n                 source: str,\n                 ) -> dict:\n    \"\"\"\n    Conflate all the data. This the primary interfacte for conflation.\n\n    Args:\n        source (str): The source file to conflate\n\n    Returns:\n        (dict):  The conflated features\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n\n    log.info(\"Opening data file: %s\" % source)\n    toplevel = Path(source)\n    if toplevel.suffix == \".geosjon\":\n        src = open(source, \"r\")\n        self.data = geojson.load(src)\n    elif toplevel.suffix == \".osm\":\n        src = open(source, \"r\")\n        osmin = OsmFile()\n        self.data = osmin.loadFile(source) # input file\n        if self.boundary:\n            gs = GeoSupport(source)\n            # self.data = gs.clipFile(self.data)\n\n    # Use fuzzy string matching to handle minor issues in the name column,\n    # which is often used to match an amenity.\n    if len(self.data) == 0:\n        self.postgres[0].query(\"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch\")\n    # log.debug(f\"OdkMerge::conflateData() called! {len(odkdata)} features\")\n\n    # A chunk is a group of threads\n    chunk = round(len(self.data) / cores)\n\n    # cycle = range(0, len(odkdata), chunk)\n\n    # Chop the data into a subset for each thread\n    newdata = list()\n    future = None\n    result = None\n    index = 0\n    if True:                # DEBUGGING HACK ALERT!\n        result = conflateThread(self.data, self, index)\n        return dict()\n\n    with concurrent.futures.ThreadPoolExecutor(max_workers=cores) as executor:\n        i = 0\n        subset = dict()\n        futures = list()\n        for key, value in self.data.items():\n            subset[key] = value\n            if i == chunk:\n                i = 0\n                result = executor.submit(conflateThread, subset, self, index)\n                index += 1\n                # result.add_done_callback(callback)\n                futures.append(result)\n                subset = dict()\n            i += 1\n        for future in concurrent.futures.as_completed(futures):\n        # # for future in concurrent.futures.wait(futures, return_when='ALL_COMPLETED'):\n            log.debug(f\"Waiting for thread to complete..\")\n            # print(f\"YYEESS!! {future.result(timeout=10)}\")\n            newdata.append(future.result(timeout=5))\n    timer.stop()\n    return newdata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.writeOSM","title":"writeOSM","text":"
    writeOSM(data, filespec)\n

    Write the data to an OSM XML file.

    Parameters:

    Name Type Description Default data list

    The list of GeoJson features

    required filespec str

    The output file name

    required Source code in osm_merge/conflator.py
    def writeOSM(self,\n             data: list,\n             filespec: str,\n             ):\n    \"\"\"\n    Write the data to an OSM XML file.\n\n    Args:\n        data (list): The list of GeoJson features\n        filespec (str): The output file name\n    \"\"\"\n    osm = OsmFile(filespec)\n    negid = -100\n    id = -1\n    out = str()\n    for entry in data:\n        version = 1\n        tags = entry[\"properties\"]\n        if \"osm_id\" in tags:\n            id = tags[\"osm_id\"]\n        elif \"id\" in tags:\n            id = tags[\"id\"]\n        elif \"id\" not in tags:\n            # There is no id or version for non OSM features\n            id -= 1\n        if \"version\" in entry[\"properties\"]:\n            version = int(entry[\"properties\"][\"version\"])\n            version += 1\n        if id == 814085818:\n            breakpoint()\n        attrs = {\"id\": id, \"version\": version}\n        # These are OSM attributes, not tags\n        if \"id\" in tags:\n            del tags[\"id\"]\n        if \"version\" in tags:\n            del tags[\"version\"]\n        item = {\"attrs\": attrs, \"tags\": tags}\n        # if entry[\"geometry\"][\"type\"] == \"LineString\" or entry[\"geometry\"][\"type\"] == \"Polygon\":\n        # print(entry)\n        out = str()\n        if entry[\"geometry\"] is not None and entry[\"geometry\"][\"type\"] == \"Point\":\n            # It's a node referenced by a way\n            item[\"attrs\"][\"lon\"] = entry[\"geometry\"][\"coordinates\"][0]\n            item[\"attrs\"][\"lat\"] = entry[\"geometry\"][\"coordinates\"][1]\n            if \"timestamp\" in item[\"tags\"]:\n                item[\"attrs\"][\"timestamp\"] = item[\"tags\"][\"timestamp\"]\n                del item[\"tags\"][\"timestamp\"]\n            # referenced nodes should have no tags\n            del item[\"tags\"]\n            # FIXME: determine if we need to write nodes\n            # out = osm.createNode(item, False)\n            continue\n        else:\n            # OSM ways don't have a geometry, just references to node IDs.\n            # The OSM XML file won't have any nodes, so at first won't\n            # display in JOSM until you do a File->\"Update modified\",\n            if \"refs\" not in tags:\n                log.error(f\"No Refs: {tags}\")\n                continue\n                # breakpoint()\n            if len(tags['refs']) > 0:\n                if type(tags[\"refs\"]) != list:\n                    item[\"refs\"] = eval(tags[\"refs\"])\n                else:\n                    item[\"refs\"] = tags[\"refs\"]\n                del tags[\"refs\"]\n                out = osm.createWay(item, True)\n        if len(out) > 0:\n            osm.write(out)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.writeGeoJson","title":"writeGeoJson","text":"
    writeGeoJson(data, filespec)\n

    Write the data to a GeoJson file.

    Parameters:

    Name Type Description Default data dict

    The list of GeoJson features

    required filespec str

    The output file name

    required Source code in osm_merge/conflator.py
    def writeGeoJson(self,\n             data: dict,\n             filespec: str,\n             ):\n    \"\"\"\n    Write the data to a GeoJson file.\n\n    Args:\n        data (dict): The list of GeoJson features\n        filespec (str): The output file name\n    \"\"\"\n    file = open(filespec, \"w\")\n    fc = FeatureCollection(data)\n    geojson.dump(fc, file, indent=4)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.osmToFeature","title":"osmToFeature","text":"
    osmToFeature(osm)\n

    Convert an entry from an OSM XML file with attrs and tags into a GeoJson Feature.

    Parameters:

    Name Type Description Default osm dict

    The OSM entry

    required

    Returns:

    Type Description Feature

    A GeoJson feature

    Source code in osm_merge/conflator.py
    def osmToFeature(self,\n                 osm: dict(),\n                 ) -> Feature:\n    \"\"\"\n    Convert an entry from an OSM XML file with attrs and tags into\n    a GeoJson Feature.\n\n    Args:\n        osm (dict): The OSM entry\n\n    Returns:\n        (Feature): A GeoJson feature\n    \"\"\"\n    if \"attrs\" not in osm:\n        return Feature(geometry=shape(osm[\"geometry\"]), properties=osm[\"properties\"])\n\n    if \"osm_id\" in osm[\"attrs\"]:\n        id = osm[\"attrs\"][\"osm_id\"]\n    elif \"id\" in osm[\"attrs\"]:\n        id = osm[\"attrs\"][\"id\"]\n    props = {\"id\": id}\n    if \"version\" in osm[\"attrs\"]:\n        props[\"version\"] = osm[\"attrs\"][\"version\"]\n\n    props.update(osm[\"tags\"])\n    # It's a way, so no coordinate\n    if \"refs\" in osm:\n        return Feature(properties=props)\n    else:\n        geom = Point((float(osm[\"attrs\"][\"lon\"]), float(osm[\"attrs\"][\"lat\"])))\n\n        return Feature(geometry=geom, properties=props)\n
    "},{"location":"api/#conflatebuildingspy","title":"conflateBuildings.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The DB URI

    None boundary Polygon

    The AOI of the project

    None

    Returns:

    Type Description ConflateDB

    An instance of this object

    Source code in osm_merge/conflateBuildings.py
    def __init__(\n    self,\n    dburi: str = None,\n    boundary: Polygon = None,\n):\n    \"\"\"This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The DB URI\n        boundary (Polygon): The AOI of the project\n\n    Returns:\n        (ConflateDB): An instance of this object\n    \"\"\"\n    self.postgres = list()\n    self.uri = None\n    if dburi:\n        self.uri = uriParser(dburi)\n        self.db = GeoSupport(dburi)\n    self.boundary = boundary\n    self.view = \"ways_poly\"\n    self.filter = list()\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.addSourceFilter","title":"addSourceFilter","text":"
    addSourceFilter(source)\n

    Add to a list of suspect bad source datasets

    Source code in osm_merge/conflateBuildings.py
    def addSourceFilter(\n    self,\n    source: str,\n):\n    \"\"\"Add to a list of suspect bad source datasets\"\"\"\n    self.filter.append(source)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.overlapDB","title":"overlapDB","text":"
    overlapDB(dburi)\n

    Conflate buildings where all the data is in the same postgres database using the Underpass raw data schema.

    Parameters:

    Name Type Description Default dburi str

    The URI for the existing OSM data

    required

    This is not fast for large areas!

    Source code in osm_merge/conflateBuildings.py
    def overlapDB(\n    self,\n    dburi: str,\n):\n    \"\"\"Conflate buildings where all the data is in the same postgres database\n    using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The URI for the existing OSM data\n\n    This is not fast for large areas!\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    # Find duplicate buildings in the same database\n    # sql = f\"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)\"\n    # sql = \"SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1\"\n    # Make a new postgres VIEW of all overlapping or touching buildings\n    # log.info(f\"Looking for overlapping buildings in \\\"{self.uri['dbname']}\\\", this make take awhile...\")\n    # print(sql)\n    # Views must be dropped in the right order\n    sql = (\n        \"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;\"\n    )\n    result = self.db.queryDB(sql)\n\n    if self.boundary:\n        self.db.clipDB(self.boundary)\n\n    log.debug(\"Clipping OSM database\")\n    ewkt = shape(self.boundary)\n    uri = uriParser(dburi)\n    log.debug(f\"Extracting OSM subset from \\\"{uri['dbname']}\\\"\")\n    sql = f\"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL\"\n    # print(sql)\n    result = self.db.queryDB(sql)\n\n    sql = \"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL\"\n    print(sql)\n    result = self.db.queryDB(sql)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.cleanDuplicates","title":"cleanDuplicates","text":"
    cleanDuplicates()\n

    Delete the entries from the duplicate building view.

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def cleanDuplicates(self):\n    \"\"\"Delete the entries from the duplicate building view.\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    log.debug(\"Removing duplicate buildings from ways_view\")\n    sql = \"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)\"\n\n    result = self.db.queryDB(sql)\n    return True\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.getNew","title":"getNew","text":"
    getNew()\n

    Get only the new buildings

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def getNew(self):\n    \"\"\"Get only the new buildings\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    sql = \"SELECT osm_id,geom,tags FROM ways_view\"\n    result = self.db.queryDB(sql)\n    features = list()\n    for item in result:\n        # log.debug(item)\n        entry = {\"osm_id\": item[0]}\n        entry.update(item[2])\n        geom = wkb.loads(item[1])\n        features.append(Feature(geometry=geom, properties=entry))\n\n    log.debug(f\"{len(features)} new features found\")\n    return FeatureCollection(features)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.findHighway","title":"findHighway","text":"
    findHighway(feature)\n

    Find the nearest highway to a feature

    Parameters:

    Name Type Description Default feature Feature

    The feature to check against

    required Source code in osm_merge/conflateBuildings.py
    def findHighway(\n    self,\n    feature: Feature,\n):\n    \"\"\"Find the nearest highway to a feature\n\n    Args:\n        feature (Feature): The feature to check against\n    \"\"\"\n    pass\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.getDuplicates","title":"getDuplicates","text":"
    getDuplicates()\n

    Get the entries from the duplicate building view.

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def getDuplicates(self):\n    \"\"\"Get the entries from the duplicate building view.\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    sql = \"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view\"\n    result = self.db.queryDB(sql)\n    features = list()\n    for item in result:\n        # log.debug(item)\n        # First building identified\n        entry = {\"area\": float(item[0]), \"id\": int(item[1])}\n        geom = wkb.loads(item[2])\n        entry.update(item[3])\n        features.append(Feature(geometry=geom, properties=entry))\n\n        # Second building identified\n        entry = {\"area\": float(item[0]), \"id\": int(item[4])}\n        entry[\"id\"] = int(item[4])\n        geom = wkb.loads(item[5])\n        entry.update(item[6])\n        # FIXME: Merge the tags from the buildings into the OSM feature\n        # entry.update(item[3])\n        features.append(Feature(geometry=geom, properties=entry))\n\n    log.debug(f\"{len(features)} duplicate features found\")\n    return FeatureCollection(features)\n
    "},{"location":"api/#conflatepoipy","title":"conflatePOI.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The DB URI

    None boundary Polygon

    The AOI of the project

    None threshold int

    The distance in meters for distance calculations

    7

    Returns:

    Type Description ConflatePOI

    An instance of this object

    Source code in osm_merge/conflatePOI.py
    def __init__(self,\n             dburi: str = None,\n             boundary: Polygon = None,\n             threshold: int = 7,\n             ):\n    \"\"\"\n    This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The DB URI\n        boundary (Polygon): The AOI of the project\n        threshold (int): The distance in meters for distance calculations\n\n    Returns:\n        (ConflatePOI): An instance of this object\n    \"\"\"\n    self.data = dict()\n    self.db = None\n    self.tolerance = threshold # Distance in meters for conflating with postgis\n    self.boundary = boundary\n    # Use a common select so it's consistent when parsing results\n    self.select = \"SELECT osm_id,tags,version,ST_AsText(geom),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;%s\\'))\"\n    if dburi:\n        # for thread in range(0, cores + 1):\n        self.db = GeoSupport(dburi)\n        # self.db.append(db)\n        # We only need to clip the database into a new table once\n        if boundary:\n            self.db.clipDB(boundary, self.db.db)\n            self.db.clipDB(boundary, self.db.db, \"nodes_view\", \"nodes\")\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.overlaps","title":"overlaps","text":"
    overlaps(feature)\n

    Conflate a POI against all the features in a GeoJson file

    Parameters:

    Name Type Description Default feature dict

    The feature to conflate

    required

    Returns:

    Type Description dict

    The modified feature

    Source code in osm_merge/conflatePOI.py
    def overlaps(self,\n            feature: dict,\n            ):\n    \"\"\"\n    Conflate a POI against all the features in a GeoJson file\n\n    Args:\n        feature (dict): The feature to conflate\n\n    Returns:\n        (dict):  The modified feature\n    \"\"\"\n    # Most smartphone GPS are 5-10m off most of the time, plus sometimes\n    # we're standing in front of an amenity and recording that location\n    # instead of in the building.\n    gps_accuracy = 10\n    # this is the treshold for fuzzy string matching\n    match_threshold = 80\n    # log.debug(f\"conflateFile({feature})\")\n    hits = False\n    data = dict()\n    geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n    wkt = shape(geom)\n    for existing in self.data['features']:\n        id = int(existing['properties']['id'])\n        entry = shapely.from_geojson(str(existing))\n        if entry.geom_type != 'Point':\n            center = shapely.centroid(entry)\n        else:\n            center = entry\n            # dist = shapely.hausdorff_distance(center, wkt)\n            # if 'name' in existing['properties']:\n            #     print(f\"DIST1: {dist}, {existing['properties']['name']}\")\n        # x = shapely.distance(wkt, entry)\n        # haversine reverses the order of lat & lon from what shapely uses. We\n        # use this as meters is easier to deal with than cartesian coordinates.\n        x1 = (center.coords[0][1], center.coords[0][0])\n        x2 = (wkt.coords[0][1], wkt.coords[0][0])\n        dist = haversine(x1, x2, unit=Unit.METERS)\n        if dist < gps_accuracy:\n            # if 'name' in existing['properties']:\n            # log.debug(f\"DIST2: {dist}\")\n            # log.debug(f\"Got a Hit! {feature['tags']['name']}\")\n            for key,value in feature['tags'].items():\n                if key in self.analyze:\n                    if key in existing['properties']:\n                        result = fuzz.ratio(value, existing['properties'][key])\n                        if result > match_threshold:\n                            # log.debug(f\"Matched: {result}: {feature['tags']['name']}\")\n                            existing['properties']['fixme'] = \"Probably a duplicate!\"\n                            log.debug(f\"Got a dup in file!!! {existing['properties']['name'] }\")\n                            hits = True\n                            break\n        if hits:\n            version = int(existing['properties']['version'])\n            # coords = feature['geometry']['coordinates']\n            # lat = coords[1]\n            # lon = coords[0]\n            attrs = {'id': id, 'version': version, 'lat': feature['attrs']['lat'], 'lon': feature['attrs']['lon']}\n            tags = existing['properties']\n            tags['fixme'] = \"Probably a duplicate!\"\n            # Data extracts for ODK Collect\n            del tags['title']\n            del tags['label']\n            if 'building' in tags:\n                return {'attrs': attrs, 'tags': tags, 'refs': list()}\n            return {'attrs': attrs, 'tags': tags}\n    return dict()\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryToFeature","title":"queryToFeature","text":"
    queryToFeature(results)\n

    Convert the results of an SQL to a GeoJson Feature

    Parameters:

    Name Type Description Default results list

    The results of the query

    required

    Returns:

    Type Description list

    a list of the features fromn the results

    Source code in osm_merge/conflatePOI.py
    def queryToFeature(self,\n                   results: list,\n                   ):\n    \"\"\"\n    Convert the results of an SQL to a GeoJson Feature\n\n    Args:\n        results (list): The results of the query\n\n    Returns:\n        (list): a list of the features fromn the results\n    \"\"\"\n\n    features = list()\n    for entry in results:\n        osm_id = int(entry[0])\n        tags = entry[1]\n        version = int(entry[2])\n        coords = shapely.from_wkt(entry[3])\n        dist = entry[4]\n        # ways have an additional column\n        if len(entry) == 6:\n            refs = entry[5]\n        else:\n            refs = list()\n        if coords.geom_type == 'Polygon':\n            center = shapely.centroid(coords)\n            lat = center.y\n            lon = center.x\n            tags['geom_type'] = 'way'\n        elif coords.geom_type == \"Point\":\n            lat = coords.y\n            lon = coords.x\n            tags['geom_type'] = 'node'\n        else:\n            log.error(f\"Unsupported geometry type: {coords.geom_type}\")\n        # match = entry[5] # FIXME: for debugging\n        # the timestamp attribute gets added when it's uploaded to OSM.\n        attrs = {'id': osm_id,\n                'version': version,\n                'lat': lat,\n                'lon': lon,\n                }\n        tags['dist'] = dist\n        # tags['match'] = match # FIXME: for debugging\n        # tags['fixme'] = \"Probably a duplicate node!\"\n        features.append({'attrs': attrs, 'tags': tags, 'refs': refs})\n\n    return features\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.checkTags","title":"checkTags","text":"
    checkTags(feature, osm)\n

    Check tags between 2 features.

    Parameters:

    Name Type Description Default feature Feature

    The feature from the external dataset

    required osm dict

    The result of the SQL query

    required

    Returns:

    Type Description int

    The nunber of tag matches

    dict

    The updated tags

    Source code in osm_merge/conflatePOI.py
    def checkTags(self,\n              feature: Feature,\n              osm: dict,\n              ):\n    \"\"\"\n    Check tags between 2 features.\n\n    Args:\n        feature (Feature): The feature from the external dataset\n        osm (dict): The result of the SQL query\n\n    Returns:\n        (int): The nunber of tag matches\n        (dict): The updated tags\n    \"\"\"\n    tags = osm['tags']\n    hits = 0\n    match_threshold = 80\n    if osm['tags']['dist'] > float(self.tolerance):\n        return 0, osm['tags']\n    for key, value in feature['tags'].items():\n        if key in tags:\n            ratio = fuzz.ratio(value, tags[key])\n            if ratio > match_threshold:\n                hits += 1\n            else:\n                if key != 'note':\n                    tags[f'old_{key}'] = value\n        tags[key] = value\n\n    return hits, tags\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.conflateData","title":"conflateData","text":"
    conflateData(data, threshold=7)\n

    Conflate all the data. This the primary interfacte for conflation.

    Parameters:

    Name Type Description Default data list

    A list of all the entries in the OSM XML input file

    required threshold int

    The threshold for distance calculations

    7

    Returns:

    Type Description dict

    The modified features

    Source code in osm_merge/conflatePOI.py
    def conflateData(self,\n                 data: list,\n                 threshold: int = 7,\n                 ):\n    \"\"\"\n    Conflate all the data. This the primary interfacte for conflation.\n\n    Args:\n        data (list): A list of all the entries in the OSM XML input file\n        threshold (int): The threshold for distance calculations\n\n    Returns:\n        (dict):  The modified features\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    # Use fuzzy string matching to handle minor issues in the name column,\n    # which is often used to match an amenity.\n    if len(self.data) == 0:\n        self.db.queryDB(\"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch\")\n    log.debug(f\"conflateData() called! {len(data)} features\")\n\n    # A chunk is a group of threads\n    entries = len(data)\n    chunk = round(len(data) / cores)\n\n    if True: # FIXME: entries <= chunk:\n        result = conflateThread(data, self)\n        timer.stop()\n        return result\n\n    # Chop the data into a subset for each thread\n    newdata = list()\n    future = None\n    result = None\n    index = 0\n    with concurrent.futures.ThreadPoolExecutor(max_workers=cores) as executor:\n        i = 0\n        subset = dict()\n        futures = list()\n        for key, value in data.items():\n            subset[key] = value\n            if i == chunk:\n                i = 0\n                result = executor.submit(conflateThread, subset, self)\n                index += 1\n                # result.add_done_callback(callback)\n                futures.append(result)\n                subset = dict()\n            i += 1\n        for future in concurrent.futures.as_completed(futures):\n            log.debug(f\"Waiting for thread to complete..\")\n            # print(f\"YYEESS!! {future.result(timeout=10)}\")\n            newdata.append(future.result(timeout=5))\n    timer.stop()\n    return newdata\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryWays","title":"queryWays","text":"
    queryWays(feature, db=None)\n

    Conflate a POI against all the ways in a postgres view

    Parameters:

    Name Type Description Default feature Feature

    The feature to conflate

    required db GeoSupport

    The datbase connection to use

    None

    Returns:

    Type Description list

    The data with tags added from the conflation

    Source code in osm_merge/conflatePOI.py
        def queryWays(self,\n                    feature: Feature,\n                    db: GeoSupport = None,\n                    ):\n        \"\"\"\n        Conflate a POI against all the ways in a postgres view\n\n        Args:\n            feature (Feature): The feature to conflate\n            db (GeoSupport): The datbase connection to use\n\n        Returns:\n            (list): The data with tags added from the conflation\n        \"\"\"\n        # log.debug(f\"conflateWay({feature})\")\n        hits = 0\n        result = list()\n        geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n        wkt = shape(geom)\n\n        # cleanval = escape(value)\n        # Get all ways close to this feature.\n#        query = f\"SELECT osm_id,tags,version,ST_AsText(ST_Centroid(geom)),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) FROM ways_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} ORDER BY ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\'))\"\n        query = f\"{self.select}\" % wkt.wkt\n        query += f\", refs FROM ways_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} ORDER BY ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\'))\"\n        #log.debug(query)\n        result = list()\n        if db:\n            result = db.queryDB(query)\n        else:\n            result = self.db.queryDB(query)\n        if len(result) > 0:\n            hits += 1\n        else:\n            log.warning(f\"No results at all for {query}\")\n\n        return result\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryNodes","title":"queryNodes","text":"
    queryNodes(feature, db=None)\n

    Find all the nodes in the view within a certain distance that are buildings or amenities.

    Parameters:

    Name Type Description Default feature Feature

    The feature to use as the location

    required db GeoSupport

    The database connection to use

    None

    Returns:

    Type Description list

    The results of the conflation

    Source code in osm_merge/conflatePOI.py
    def queryNodes(self,\n                 feature: Feature,\n                 db: GeoSupport = None,\n                 ):\n    \"\"\"\n    Find all the nodes in the view within a certain distance that\n    are buildings or amenities.\n\n    Args:\n        feature (Feature): The feature to use as the location\n        db (GeoSupport): The database connection to use\n\n    Returns:\n        (list): The results of the conflation\n    \"\"\"\n    # log.debug(f\"queryNodes({feature})\")\n    hits = 0\n    geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n    wkt = shape(geom)\n    result = list()\n    ratio = 1\n\n    # for key,value in feature['tags'].items():\n    # print(f\"NODE: {key} = {value}\")\n    # if key not in self.analyze:\n    #     continue\n\n    # Use a Geography data type to get the answer in meters, which\n    # is easier to deal with than degress of the earth.\n    # cleanval = escape(value)\n    # query = f\"SELECT osm_id,tags,version,ST_AsEWKT(geom),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')),levenshtein(tags->>'{key}', '{cleanval}') FROM nodes_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} AND levenshtein(tags->>'{key}', '{cleanval}') <= {ratio}\"\n    # AND (tags->>'amenity' IS NOT NULL OR tags->>'shop' IS NOT NULL)\"\n    query = f\"{self.select}\" % wkt.wkt\n    query += f\" FROM nodes_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} AND (tags->>'amenity' IS NOT NULL OR tags->>'building' IS NOT NULL)\"\n    #log.debug(query)\n    # FIXME: this currently only works with a local database,\n    # not underpass yet\n    if db:\n        result = db.queryDB(query)\n    else:\n        result = self.db.queryDB(query)\n    # log.debug(f\"Got {len(result)} results\")\n    if len(result) > 0:\n        hits += 1\n        # break\n    # else:\n    #     log.warning(f\"No results at all for {query}\")\n\n    return result\n
    "},{"location":"api/#geosupportpy","title":"geosupport.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None

    Returns:

    Type Description GeoSupport

    An instance of this object

    Source code in osm_merge/geosupport.py
    def __init__(self,\n             dburi: str = None,\n             config: str = None,\n             ):\n    \"\"\"\n    This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n\n    Returns:\n        (GeoSupport): An instance of this object\n    \"\"\"\n    self.db = None\n    self.dburi = dburi\n    self.config = config\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.geosupport.GeoSupport.importDataset","title":"importDataset async","text":"
    importDataset(filespec)\n

    Import a GeoJson file into a postgres database for conflation.

    Parameters:

    Name Type Description Default filespec str

    The GeoJson file to import

    required

    Returns:

    Type Description bool

    If the import was successful

    Source code in osm_merge/geosupport.py
    async def importDataset(self,\n                 filespec: str,\n                 ) -> bool:\n    \"\"\"\n    Import a GeoJson file into a postgres database for conflation.\n\n    Args:\n        filespec (str): The GeoJson file to import\n\n    Returns:\n        (bool): If the import was successful\n    \"\"\"\n    file = open(filespec, \"r\")\n    data = geojson.load(file)\n\n    # Create the tables\n    sql = \"CREATE EXTENSION postgis;\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.nodes CASCADE; CREATE TABLE public.nodes (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.ways_line CASCADE; CREATE TABLE public.ways_line (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.poly CASCADE; CREATE TABLE public.ways_poly (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n\n    # if self.db.is_closed():\n    #     return False\n\n    table = self.dburi.split('/')[1]\n    for entry in data[\"features\"]:\n        keys = \"geom, \"\n        geometry = shape(entry[\"geometry\"])\n        ewkt = geometry.wkt\n        if geometry.geom_type == \"LineString\":\n            table = \"ways_line\"\n        if geometry.geom_type == \"Polygon\":\n            table = \"ways_poly\"\n        if geometry.geom_type == \"Point\":\n            table = \"nodes\"\n        tags = f\"\\'{{\"\n        for key, value in entry[\"properties\"].items():\n            tags += f\"\\\"{key}\\\": \\\"{value}\\\", \"\n        tags = tags[:-2]\n        tags += \"}\\'::jsonb)\"\n        sql = f\"INSERT INTO {table} (geom, tags) VALUES(ST_GeomFromEWKT(\\'SRID=4326;{ewkt}\\'), {tags}\"\n        result = await self.db.pg.execute(sql)\n\n    return False\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.initialize","title":"initialize async","text":"
    initialize(dburi=None, config=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None Source code in osm_merge/geosupport.py
    async def initialize(self,\n                    dburi: str = None,\n                    config: str = None,\n                    ):\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    \"\"\"\n    if dburi:\n        self.db = PostgresClient()\n        await self.db.connect(dburi)\n    elif self.dburi:\n        self.db = PostgresClient()\n        await self.db.connect(self.dburi)\n\n    if config:\n        await self.db.loadConfig(config)\n    elif self.config:\n        await self.db.loadConfig(config)\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.clipDB","title":"clipDB async","text":"
    clipDB(boundary, db=None, view='ways_view')\n

    Clip a database table by a boundary

    Parameters:

    Name Type Description Default boundary Polygon

    The AOI of the project

    required db PostgresClient

    A reference to the existing database connection

    None view str

    The name of the new view

    'ways_view'

    Returns:

    Type Description bool

    If the region was clipped sucessfully

    Source code in osm_merge/geosupport.py
    async def clipDB(self,\n         boundary: Polygon,\n         db: PostgresClient = None,\n         view: str = \"ways_view\",\n         ):\n    \"\"\"\n    Clip a database table by a boundary\n\n    Args:\n        boundary (Polygon): The AOI of the project\n        db (PostgresClient): A reference to the existing database connection\n        view (str): The name of the new view\n\n    Returns:\n        (bool): If the region was clipped sucessfully\n    \"\"\"\n    remove = list()\n    if not boundary:\n        return False\n\n    ewkt = shape(boundary)\n\n    # Create a new postgres view\n    # FIXME: this should be a temp view in the future, this is to make\n    # debugging easier.\n    sql = f\"DROP VIEW IF EXISTS {view} CASCADE ;CREATE VIEW {view} AS SELECT * FROM ways_poly WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom)\"\n    # log.debug(sql)\n    if db:\n        result = await db.queryDB(sql)\n    elif self.db:\n        result = await self.db.queryDBl(sql)\n    else:\n        return False\n\n    return True\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.queryDB","title":"queryDB async","text":"
    queryDB(sql=None, db=None)\n

    Query a database table

    Parameters:

    Name Type Description Default db PostgreClient

    A reference to the existing database connection

    None sql str

    The SQL query to execute

    None

    Returns:

    Type Description list

    The results of the query

    Source code in osm_merge/geosupport.py
    async def queryDB(self,\n            sql: str = None,\n            db: PostgresClient = None,\n            ) -> list:\n    \"\"\"\n    Query a database table\n\n    Args:\n        db (PostgreClient, optional): A reference to the existing database connection\n        sql (str): The SQL query to execute\n\n    Returns:\n        (list): The results of the query\n    \"\"\"\n    result = list()\n    if not sql:\n        log.error(f\"You need to pass a valid SQL string!\")\n        return result\n\n    if db:\n        result = db.queryLocal(sql)\n    elif self.db:\n        result = self.db.queryLocal(sql)\n\n    return result\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.clipFile","title":"clipFile async","text":"
    clipFile(boundary, data)\n

    Clip a database table by a boundary

    Parameters:

    Name Type Description Default boundary Polygon

    The filespec of the project AOI

    required data FeatureCollection

    The data to clip

    required

    Returns:

    Type Description FeatureCollection

    The data within the boundary

    Source code in osm_merge/geosupport.py
    async def clipFile(self,\n            boundary: Polygon,\n            data: FeatureCollection,\n            ):\n    \"\"\"\n    Clip a database table by a boundary\n\n    Args:\n        boundary (Polygon): The filespec of the project AOI\n        data (FeatureCollection): The data to clip\n\n    Returns:\n        (FeatureCollection): The data within the boundary\n    \"\"\"\n    new = list()\n    if len(self.data) > 0:\n        for feature in self.data[\"features\"]:\n            shapely.from_geojson(feature)\n            if not shapely.contains(ewkt, entry):\n                log.debug(f\"CONTAINS {entry}\")\n                new.append(feature)\n                #  del self.data[self.data['features']]\n\n    return new\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.copyTable","title":"copyTable async","text":"
    copyTable(table, remote)\n

    Use DBLINK to copy a table from the external database to a local table so conflating is much faster.

    Parameters:

    Name Type Description Default table str

    The table to copy

    required Source code in osm_merge/geosupport.py
    async def copyTable(self,\n                    table: str,\n                    remote: PostgresClient,\n                    ):\n    \"\"\"\n    Use DBLINK to copy a table from the external\n    database to a local table so conflating is much faster.\n\n    Args:\n        table (str): The table to copy\n    \"\"\"\n    timer = Timer(initial_text=f\"Copying {table}...\",\n                  text=\"copying {table} took {seconds:.0f}s\",\n                  logger=log.debug,\n                )\n    # Get the columns from the remote database table\n    self.columns = await remote.getColumns(table)\n\n    print(f\"SELF: {self.pg.dburi}\")\n    print(f\"REMOTE: {remote.dburi}\")\n\n    # Do we already have a local copy ?\n    sql = f\"SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename  = '{table}'\"\n    result = await self.pg.execute(sql)\n    print(result)\n\n    # cleanup old temporary tables in the current database\n    # drop = [\"DROP TABLE IF EXISTS users_bak\",\n    #         \"DROP TABLE IF EXISTS user_interests\",\n    #         \"DROP TABLE IF EXISTS foo\"]\n    # result = await pg.pg.executemany(drop)\n    sql = f\"DROP TABLE IF EXISTS new_{table} CASCADE\"\n    result = await self.pg.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS {table}_bak CASCADE\"\n    result = await self.pg.execute(sql)\n    timer.start()\n    dbuser = self.pg.dburi[\"dbuser\"]\n    dbpass = self.pg.dburi[\"dbpass\"]\n    sql = f\"CREATE SERVER IF NOT EXISTS pg_rep_db FOREIGN DATA WRAPPER dblink_fdw  OPTIONS (dbname 'tm4');\"\n    data = await self.pg.execute(sql)\n\n    sql = f\"CREATE USER MAPPING IF NOT EXISTS FOR {dbuser} SERVER pg_rep_db OPTIONS ( user '{dbuser}', password '{dbpass}');\"\n    result = await self.pg.execute(sql)\n\n    # Copy table from remote database so JOIN is faster when it's in the\n    # same database\n    #columns = await sel.getColumns(table)\n    log.warning(f\"Copying a remote table is slow, but faster than remote access......\")\n    sql = f\"SELECT * INTO {table} FROM dblink('pg_rep_db','SELECT * FROM {table}') AS {table}({self.columns})\"\n    print(sql)\n    result = await self.pg.execute(sql)\n\n    return True\n
    "},{"location":"calculations/","title":"Conflation Calculations","text":"

    Part of the fun of external datasets, especially some that have been around long time like the MVUM data is the the variety of inconsistencies in the data. While OpenStreetMap itself is a bit overly flexible at time, so is external data. And some of the old data has been converted from other formats several times, with bugs getting introduced each time.

    "},{"location":"calculations/#geometries","title":"Geometries","text":"

    OpenStreetMap has relations, which are a collection of references to other features. External data may have LineStrings, MultiLineStrings or a GeometryCollection, all in the same file! For all calculations the MultiLineString and GeometryCollections are taken apart, so the calculations are between OSM data and that segment of the external data. Since this may product multiple values, those need to be evaluated and the most likely one returned.

    "},{"location":"calculations/#distance","title":"Distance","text":"

    A simple distance calculation is performed after transforming the coordinate system from global degrees to meters. The result is compared to a threshold distance, and any feature within that threshold is added to a list of possible matches. After a few features are found in the required distance, matching stops and then the next feature to be conflated is started on the same process.

    If the highway is a GeometryCollection or MultiLineString, then it's split into segments, and each one is checked for distance. The closest one is what is returned.

    "},{"location":"calculations/#slope-and-angle","title":"Slope and Angle","text":"

    Distance often will return features that are close to each other, but often they are spur roads off the more major one. So when two highway segments are found close to each other, the angle between them is calculated. This works well to differentiate between the more major highway, and the spur road that splits off from that.

    If the highway is a GeometryCollection or MultiLineString, then it's split into segments, and each one is checked for the angle. The closest one is what is returned.

    "},{"location":"calculations/#tag-checking","title":"Tag Checking","text":"

    Once there is at least one candidate within the parameters of distance and angle, then the tags are checked for matches. The tags we are primarily interested in are name(s) and reference number(s) of each MVUM road or trail. Some of the existing features in OpenStreetMap may be inaccurate as to the proper name and reference. And of course each feature may have an alt_name or both a ref and a ref:usfs. Due to the wonders of inconsistent data, a fuzzy string comparison is done. This handles most of the basic issues, like capitalization, one or 2 characters difference, etc... Anything above the threshold is considered a probably match, and increments a counter. This value is included in the conflated results, and is often between 1-3.

    The reference numbers between the two datasets is also compared. There is often a reference number in OSM already, but no name. The external dataset has the name, so we want to update OSM with that. In addition, the external datasets often have access information. Seasonal access, private land, or different types of vehicles which can be added to OSM.

    "},{"location":"calculations/#tag-merging","title":"Tag Merging","text":"

    The conflation process for merging tags uses the concept of primary and secondary datasets. The primary is considered to have the true value for a highway or trail. For example, if the name in the two datasets doesn't match, the secondary will then rename the current value to old_something. The primary's version becomes the same. Some with reference numbers.

    Other tags from the primary can also be merged, overriding what is currently in OSM. Once again, the old values are renamed, not deleted. When validating in JOSM, you can see both versions and make a final determination as to what is the correct value. Often it's just spelling differences.

    For all the features in OSM that only have a highway=something as a tag, all the desired tags from the primary dataset are added.

    For some tags like surface and smoothness, the value in OSM is potentially more recent, so those are not updated. For any highway feature lacking those tags, they get added.

    Optionally the various access tags for private, atv, horse, motorcycle, etc... are set in the post conflation dataset if they have a value in the external dataset.

    "},{"location":"calculations/#debug-tags","title":"Debug Tags","text":"

    Currently a few tags are added to each feature to aid in validating and debugging the conflation process. These should obviously be removed before uploading to OSM. They'll be removed at a future date after more validation. These are:

    • hits - The number of matching tags in a feature
    • ratio - The ratio for name matching if not 100%
    • dist - The distance between features
    • angle - The angle between two features
    • slope - The slope between two features
    "},{"location":"calculations/#issues","title":"Issues","text":"

    Conflation is never 100% accurate due to the wonderful um... \"flexibility\" of the datasets. Minor tweaks to the steering parameters for the distance, angle, and fuzzy string matching can produce slightly different results. I often run the same datasets with different parameters looking for the best results.

    "},{"location":"calculations/#clipping","title":"Clipping","text":"

    Where a feature crosses the task boundary, the calculations have to deal with incomplete features, which is messy. This is particularly a problem when conflating small datasets.

    "},{"location":"conflation/","title":"Conflating External Datasets","text":"

    This project is the merging of several programs for conflating external datasets with OpenStreetMap data developed at HOT. These were originally developed for large scale building imports using MS Footprints in East Africa, and to also work with conflating data collected with OpenDataKit for the Field Mapping Tasking Manager project.

    "},{"location":"conflation/#the-data-files","title":"The Data Files","text":"

    While any name can be used for the OSM database, I usually default to naming the OpenStreetMap database the country name as used in the data file. Other datasets have their own schema, and can be imported with ogr2ogr, or using python to write a custom importer. In that case I name the database after the dataset source. Past versions of this program could conflate between multiple datasets, so it's good to keep things clear.

    "},{"location":"conflation/#overture-data","title":"Overture Data","text":"

    The Overture Foundation (https://www.overturemaps.org) has been recently formed to build a competitor to Google Maps. The plan is to use OpenStreetMap (OSM) data as a baselayer, and layer other datasets on top. The currently available data (July 2023) has 13 different datasets in addition to the OSM data. It is available here. It also includes a snapshot of OSM data from the same time frame. Other than the OSM data and MS Footprints, all the current additional data is primarily US specific, and often contains multiple copies of the same dataset, but from different organization.

    The osm-rawdata python module has a utility that'll import the Parquet data files into the postgress database schema used by multiple projects at HOT. That schema is designed for data analysis, unlike the standard OSM database schema. There is more detail in these notes I've written about importing Overture Data into postgres.

    "},{"location":"conflation/#duplicate-buildings","title":"Duplicate Buildings","text":"

    This is the primary conflation task. Because of offsets in the satellite imagery used for the original buildings, there is rarely an exact duplicate, only similar. The only times when you see an exact duplicate, it's because the same source data is in multiple other datasets. The orientation may be different even if the same rough size, or it'll be roughly in the same position, but differing sizes. Several checks are made to determine duplicates. First is to check for any intersection of the two polygons. If the two polygons intersection it's an overlapping building or possibly duplicate. Any building in the footprint data that is found to be a duplicate is removed from the output data file.

    "},{"location":"conflation/#overlapping-buildings","title":"Overlapping Buildings","text":"

    It is entirely possible that a new building in the footprints data may overlap with an existing building in OSM. It wouldn't be overlapping in the footprints data. Since this requires human intervention to fix, these buildings are left in the output data, but flagged with a debugging tag of overlapping=yes. There is also many occurances where the building being imported has a better building geometry than OSM, so the best one should be selected.

    Using the HOT Underpass project, it is possible to scan the building geometries and either delete the bad geometry one, or flag it in the result data files for a human to validate the results.

    "},{"location":"conflation/#known-problems","title":"Known Problems","text":"

    There are two main issues with ML/AI derived building footprints, Buildings that are very close together, like the business section in many areas of the world, do not get marked as separate buildings. Instead the entire block of buildings is a single polygon. This will eventually get fixed by drone mapping, where there can be more of a street view of the buildings that you can't get using existing satellite imagery.

    The other problem is that as processing satellite imagery is that buildings are recognized by shading differences, so often features are flagged as buildings that don't actually exist. For example, big rocks in the desert, or haystacks in a field both get marked as a building. Any building in the footprints data that has no other buildings nearby, nor a highway or path of some kind, is flagged with a debugging tag of false=yes. Usually this is easy to determine looking at satellite imagery, since these are often remote buildings. The tags can be searched for when editing the data to visually determine whether it's a real building or not.

    "},{"location":"conflation/#conflating-other-than-buildings","title":"Conflating Other Than Buildings","text":""},{"location":"conflation/#opendatakit","title":"OpenDataKit","text":"

    Data collected in the field using ODK Collect is a specific case. If using using data extracts from OpenStreetMap, the data extract has the OSM ID, so it's much simpler to conflate the new tags with either the existing building polygon or POI. For this workflow, any tag in the feature from ODK will overwrite any existing values in the existing feature. This allows for updating the tags & values when ground-truthing. When the OSM XML file is loaded into JOSM, it has the modified attribute set, and the version has been incremented. In JOSM under the File menu, select the Update Modified menu item. This will sync the modified feature with current OSM. At that point all that needs to be done is validate the modified features, and upload to OSM.

    When ODK Collect is used but has no data extract, conflation is more complicated. For this use case, a more brute force algorythm is used. Initially any building polygon or POI within 7 meters is found by querying the database. Most smartphone GPS chipsets, even on high-end phones, are between 4-9m off from your actual location. That value was derived by looking at lots of data, and can be changed when invoking the conflation software in this project. Once nearby buildings are identified, then the tags are compared to see if there is a match.

    For example, if collecting data on a restaurant, it may have a new name, but if the nearby building is the only one with an amenity=restaurant** (or cafe, pub, etc...) it's considered a probable match. If there are multiple restaurants this doesn't work very well unless the name hasn't changed. If there are multiple possible features, a *fixme= tag is added to the POI, and it has to be later validated manually. Every tag in the ODK data has to be compares with the nearby buildings. Often it's the name tag that is used for many amenities.

    If a satellite imagery basemap is used in Collect, conflation is somewhat simpler. If the mapper has selected the center of the building using the basemap, conflation starts by checking for the building polygon in OSM that contains this location. If no building is found, the POI is added to the output file with a fixme=new building tag so the buildings can traced by the validator. Any tags from the POI are added to the new building polygon.

    "},{"location":"conflation/#points-of-interest-poi","title":"Points Of Interest (POI)","text":"

    It is common when collecting datasets from non-OSM sources each feature may only be single node. This may be a list of schools, businesses, etc... with additional information with each POI that can be added to the OSM building polygon (if it exists). Obviously any imported data must have a license acceptable for importing into OSM.

    Similar to how conflating ODK data when not using a data extract, the tags & values are compared with any nearby building. Since often these imports are features already in OSM with limited metadata, this adds more details.

    "},{"location":"conflation/#highways","title":"Highways","text":"

    Highways are more complex because it uses relations. A relation is a groups of highway segments into a single entity. Some times the tags are on the relation, other times each highway segment. The segments change when the highway condition changes, but the name and reference number doesn't change. External datasets don't use relations, they are OSM specific.

    "},{"location":"conflation/#mvum-highways","title":"MVUM Highways","text":"

    The USDA publishes a dataset of Motor Vehicle Use Maps (MVUM) highways in the National Forest. Some of this data has already been imported into OSM, although the metadata may be lacking, but the LineString is there. MVUM roads are primarily compacted dirt roads. While some can be driven in a passenger vehicle, most are varying degrees of bad to horrible to impassable. These highways are often used for recreational traffic by off-road vehicles, or for emergency access for a wildland fire or backcountry rescue.

    Another key detail of MVUM highways is each one may have 4 names! There is of course the primary name, for example \"Cedar Lake Road\". But it may also have a locals name, common in remote areas. And then there is the reference number. A MVUM highway may have two reference numbers, the country designated one, and the USDA one. Luckily OSM supports this. Many of these tags effect both how the highway is displayed, as well as routing for navigation.

    \"name\": \"Platte Lake Road\",\n\"alt_name\": \"Bar-K Ranch Road\",\n\"surface\": \"dirt\",\n\"smoothness\": \"bad\",\n\"highway\": \"track\",\n\"ref\": \"CO 112\",\n\"ref:usfs\": \"FR 521.1A\"\n\"tracktype\": \"grade3\"\n

    A bad highway is something I'd be comfortable driving in a 4x4 high-clearance vehicle. Smoothness values can be a bit misleading, as often what is in OSM may be years out of date. And most MVUM roads get zero maintainance, so get eroded, pot-holed, and or exposed rocks. And people's perception of road conditions is subjective based on one's experience driving these highways.

    All of this metadata makes conflation interesting. Since existing OSM features were added by more than one person, the tagging may not be consistent. For example, the existing data may have Forest Service Road 123, which should really be ref:usfs=FR 123. And the real highway name Piney Pass Road is in the MVUM dataset. The goal of highway conflation is to merge the new metadata into the existing OSM feature where possible. This then needs to be validated by a human being. There is still much tedious work to process post conflation data before it can be uploaded to OSM.

    But sometimes conflation works well, especially when the LineString in OSM was imported from older versions of the MVUM data. But often highways in OSM were traced off satellite imagery, and may have wildly different geometry.

    If you ignore conflating the tags other than name or ref, the process is somewhat less messy. And tags like surface and smoothness really should be ground-truthed anyway. So I do ignore those for now and stick to validating the name and the two reference numbers which are usually lacking in OSM. That and addding consistency to the data to make it easier to make data extracts.

    To conflate OSM highways with external data, initially each entry in the external dataset does a distance comparison with the existing OSM data. There is an optional threshold to set the distance limit. Since currently this is focused on conflating files without a database, this is computationally intensive, so slow. For data that was imported in the past from MVUM datasets, a distance of zero means it's probably the same segment. The external dataset needs to have the tagging converted to the syntax OSM uses. Tagging can be adjusted using a conversion program, but as conversion is usually a one-off task, it can also be done using JOSM or QGIS. Usually it's deleting most of the tags in the external dataset that aren't appropriate for OSM. Primarily the only tags that are needed are the name and any reference numbers. Since the MVUM data also classified the types of road surface, this can also be converted. Although as mentioned, may be drastically out of data, and OSM is more recent and ground-truthed.

    Then there is a comparison of the road names. It's assumed the one from the MVUM dataset is the correct one. And since typos and weird abbreviations may exist in the datasets, fuzzy string matching is performed. This way names like FS 123.1 can match FR 123.1A. In this case the current name value in OSM becomes alt_name, and the MVUM name becomes the official name. This way when validating you can make decisions where there is confusion on what is correct. For an exact name match no other tags are checked to save a little time.

    Any other processing is going to be MVUM highway specific, so there will be an additional step to work through the reference numbers not supported by this program.

    "},{"location":"conflation/#output-files","title":"Output Files","text":"

    If the data files are huge, it's necessary to conflate with a subset of all the data. For projects using the Tasking Manager or the Field Mapping Tasking Manager you can download the project boundary file and use that. For other projects you can extract administrative bondaries from OpenStreetMap, or use external sources. Usually county administrative boundaries are a good size. These can be extracted from OSM itself, or an external data file of boundaries.

    After conflation, an output file is created with the new buildings that are not duplicates of existing OSM data. This is much smaller than the original data, but still too large for anyone having bandwidth issues. This output file is in GeoJson format, so can be edited with JOSM or QGIS

    Since this software is under development, rather than automatically deleting features, it adds tags to the features. Then when editing the data, it's possible to see the flagged data and validate the conflation. It also makes it possible to delete manually the results of the conflation from the output file once satisfied about the validation of the results.

    "},{"location":"conflation/#validating-the-conflation","title":"Validating The Conflation","text":"

    The conflated data file can't be uploaded to OSM until it is validated. While QGIS can be used for this purpose, JOSM is preferred because it does validation checks, and uploads directly to OpenStreetMap. I start by loading the conflation data file, and then enabling the OpenStreetMap imagery for the basemap. Existing buildings in OSM are grey polygons, so it's possible to see existing buildings with the conflated new buildings as a layer on top.

    Once the buildings are loaded, you can then download the OSM data for that view. Then use the SelectDuplicateBuilding script to find any buildings that have been added since the initial data file for conflation was used. Once selected, those can be deleted in a single operation.

    The next step is validating what is left that is considered to be a new building. This is done using satellite imagery. Most commercial satellite imagery available for public use comes from Maxar. But the different providers (Bing, ESRI, Google, etc...) have different update cycles, so I often double check with ESRI imagery.

    If there is drone imagery available from Open Aerial Map, that's also a good surce of imagery, but often doesn't cover a large area.

    "},{"location":"formats/","title":"File Formats","text":"

    This project support two file formats, GeoJson and OSM XML.

    "},{"location":"formats/#geojson","title":"GeoJson","text":"

    GeoJson is widely supported by many tools, and this project uses it as the internal data structure for consistency. At the top level the file starts with a GeometryCollection, which is just a container for the list of features.

    "},{"location":"formats/#geometry","title":"Geometry","text":"

    Each GeoJson feature contains a geometry object that has two fields, the coordinates, and the type. Shapely or GDAL can be used to convert between string representations and geometry objects.

    "},{"location":"formats/#properties","title":"Properties","text":"

    The properties is the array of keyword=value pairs, similar to the tags in OSM. There is no definition of a schema, and pair works. For conflation though, standardizing on the OSM schema for tagging pairs is critical to keep things simple.

    \"properties\": {\n    \"ref:usfs\": \"FR 965.2\",\n    \"name\": \"  Road\",\n    \"4wd_only\": \"yes\",\n    \"seasonal\": \"yes\"\n},\n
    "},{"location":"formats/#osm-xml","title":"OSM XML","text":"

    An OSM XML file is read and converted to GeoJson, and then later it can get converted to OSM XML for the output file. In addition to the tags and geometry, each feature also has attributes.

    "},{"location":"formats/#attributes","title":"Attributes","text":"

    The OSM XML format has attributes, which are used to control editing a feature. Since this project wants to generate an OSM XML file for JOSM that allows for tag merging, these attributes are important. In the post conflation data file, the version of the existing OSM feature has been incremented, and the action is set to modify. This enable JOSM to see this as an edited feature so it can be uploaded.

    • id - the OSM ID of the feature
    • version - the current version of the feature
    • action - the action to apply when uploading to OSM
      • create
      • modify
      • delete
    • timestamp - the timestamp of the feature's last change

    With action=modify set, in JOSM you can update modified and sync with current OSM.

    "},{"location":"formats/#data-types","title":"Data Types","text":"

    There are two data types in the OSM XML files used for conflation. These are nodes and ways.

    "},{"location":"formats/#nodes","title":"Nodes","text":"

    A node is a single coordinate. This is often used as a POI, and will have tags. A node that is referenced in a way won't have any tags, just the coordinates. The version and timestamp get updated if there is a change to the node location.

    <node id=\"83276871\" version=\"3\"\n    timestamp=\"2021-06-12T16:25:43Z\" lat=\"37.6064731\" lon=\"-114.00674\"/>\n
    "},{"location":"formats/#ways","title":"Ways","text":"

    A way can be a linestring, polygon, any geometry that includes more than one node. This makes it difficult to do spatial comparisons, so when an OSM XML file is loaded, in addition to the refs, they are also converted to an actual geometry. All the calculations use the geometry, and the refs are used to construct the OSM XML output file for JOSM. OSM has no concept of a LineString or Polygon, the shape is determined by the tags, for example highway=track, or building=yes.

    <way id=\"10109556\" version=\"4\" timestamp=\"2021-06-12T15:42:25Z\">\n<nd ref=\"83305252\"/>\n<nd ref=\"8118009676\"/>\n<nd ref=\"8118009677\"/>\n<nd ref=\"83277113\"/>\n<nd ref=\"83277114\"/>\n<nd ref=\"83277116\"/>\n<nd ref=\"83277117\"/>\n<tag k=\"highway\" v=\"unclassified\"/>\n<tag k=\"surface\" v=\"dirt\"/>\n

    "},{"location":"formats/#converting-between-formats","title":"Converting Between Formats","text":"

    To support reading and writing OSM XML files, this project has it's own code that builds on top of the OsmFile() class in the OSM Fieldwork. This parses the OSM XML file into GeoJson format for internal use. All of the attributes in the OSM XML file being read are convert to tags in the GeoJson properties section, and then later converted from the properties back to OSM XML attributes when writing the output file.

    "},{"location":"highways/","title":"Conflating Highway and Trail Data","text":"

    This is focused only on highway and trail data in the US, but should be useful for other countries. In particular, this is focused on the primary goal of improving OpenStreetMap data in remote areas as these are used for emergency response. Most of these roads and trails are in OSM already, some from past imports, some traced off of satellite imagery.

    I did a talk at SOTM-US in Tucson about this project called OSM For Fire Fighting. This conflation software was developed to improve the quality of the remote highway data in OpenStreetMap. This is not an import of new data, only updating existing features with a focus on improved navigation. Importing new features from these datasets uses a different process, so it's better to not mix the two.

    While there are details in the the datasets that would be useful, the initial set is the name, the reference number, and the vehicle class appropriate for this highway. Not this can change over time, so if the smoothness tag is in the OSM feature, it's assumed that value is more accurate.

    The primary purpose is to clean up the TIGER import mess, which is often inaccurate. This leads to navigation problems as sometimes what is in OSM is not what the street sign says. Since there are multiple datasets supplied by government agencies with a good license for OSM, we data mine these through conflation to get the best name and reference number.

    Although most of the fields in these datasets aren't useful for OSM, some are like is it a seasonal road, various off road vehicle access permissions, etc... since this is also useful for navigation. Any tags added or edited will follow the OSM Tagging Guidelines for forest roads.

    "},{"location":"highways/#the-datasets","title":"The Datasets","text":"

    The primary source of these datasets is available from the FSGeodata Clearinghouse, which is maintained by the USDA.

    The Topographical map vector tiles are available from here., which is maintained by the National Forest Service.

    These have been partially imported in some areas in the past, complete with the bugs in the original datasets. One big advantage though is that the geometry in OSM was from the same USDA datasets at some point in the past, so it's relatively easy to match the geometries. Conflation then is mostly working through the name and reference fields between multiple files, which sometimes don't agree on the proper name.

    And OpenStreetMap of course.

    "},{"location":"highways/#processing-the-datasets","title":"Processing The Datasets","text":"

    Since the files are very large with different schema, a critical part of the conflation process is preparing the data. Some of these files are so large neither QGIS or JOSM can load them without crashing. I use two primary tools for splitting up the files. ogr2ogr for the GeoJson files, and osmium for the OSM XML files. The OSM XML format is required if you want the conflation process to merge the tags into an existing feature. If conflating with OSM data using the GeoJson format, you need to manually cut & paste the new tags onto the existing feature.

    As you further reduce large datasets to smaller more manageable pieces, this can generate many files. The top level choice is the largest category. I use National Forests boundaries as they can cross state lines.

    All of the datasets have issues with some features lacking a geometry. These appear to be duplicates of a Feature that does have a good geometry. They are also in \"NAD 83 - EPSG:4269\" for the CRS, so need to convert and fix the geometries. I use ogr2ogr to convert the GDB files to GeoJson like this:

    ogr2ogr Road_MVUM.geojson S_USA_Road_MVUM.gdb.zip -makevalid -s_srs EPSG:4269 -t_srs EPSG:4326 -sql 'SELECT * FROM Road_MVUM WHERE SHAPE IS NOT NULL'\n\nogr2ogr Trails_MVUM.geojson S_USA_Trail_MVUM.gdb.zip -makevalid -s_srs EPSG:4269 -t_srs EPSG:4326 -sql 'SELECT * FROM Trail_MVUM WHERE SHAPE IS NOT NULL'\n

    This generates a clean GeoJson file. It has many fields we don't want, so I run a simple conversion program that parses the fields are defined in the original file, and converts the few fields we want for conflation into the OSM equivalent tag/value. For conflation to work really well, all the datasets must use the same schema for the tags and values.

    Since the MVUM dataset covers the entire country, I build a directory tree in which the deeper you go, the smaller the datasets are. I have the National Forest Service Administrative boundaries unpacked into a top level directory. From there I chop the national dataset into just the data for a forest. This is still a large file, but manageable to edit. Sometimes with rural highway mapping, a large area works better. If there are plans to use the Tasking Manager, The files are still too large, as TM has a 5000sq km limit.

    Next is generating the task boundaries for each national forest that'll be under the 5000km limit. I used the tm-splitter.py program in this project to use the national forest boundary and break it into squares, and clipped properly at the boundary. These task boundary polygons can then be used to create the project in the Tasking Manager, which will further split that into the size you want for mapping.

    Something to be conscious of is these external datasets are also full of obscure bugs. Some of the data I think hasn't been updated since the government discovered digital mapping a few decades ago. The conversion utilities will handle all of these problems in these datasets.

    "},{"location":"highways/#the-openstreetmap-extract","title":"The OpenStreetMap Extract","text":"

    This step is unnecessary if you plan to manually conflate with a GeoJson file, so jump ahead to the next section.

    To conflate against OSM data with the goal of automatically merging the tags into the feature you have to prepare the dataset. Each feature needs to be validated anyway, merging tags is more efficient than cut & paste. Since this project is processing data from multiple US states, it exceeds the Overpass data size.

    I download the states I want to conflate from Geofabrik, and then use osmium merge to turn it into one big file. I have to do this because most of the national forest cross state lines. You'll get duplicate ID errors if you download these files on different days, so grab all the ones you plan to merge at the same time. Geofabrik updates every 24 hours.

    When dealing with files too large for JOSM or QGIS, osmium is the tool to use. There is also osmfilter and osmconvert which can be used as well. Ogr2ogr can't be used as it can't write the OSM XML format. To merge multiple files with osmium, do this:

    osmium merge --overwrite -o outdata.osm *.osm.pbf\n

    The next step is to delete everything but highways from the OSM XML file. When conflating highways, we don't care about amenities or waterways.

    The prefered data extraction program for conflation is the osmhighways.py program, which has much more fine-grained control, and also replaces the older fixname.py program and fixes the issues when the name field is actually a reference. It also deletes the extraneous tiger:* tags to reduce bloat.

    You can do something similar with osmium tool, but you wind up with extra features and tags which impacts conflation performance.

    osmium tags-filter --overwrite --remove-tags -o outdata.osm indata.osm w/highway=track,service,unclassified,primary,tertiary,secondary,path,residential,abandoned,footway,motorway,trunk\n

    Finally I clip this large file into separate datasets, one for each national forest.

    osmium extract --overwrite --polygon boundary.geojson -o outdata-roads.osm\n

    Then the real fun starts after the drudgery of getting ready to do conflation.

    "},{"location":"highways/#forest-road-names","title":"Forest Road Names","text":"

    The names and reference number in OSM now have a wide variety of incorrect tagging when it comes to names. \"Forest Service Road 123.4A\" is not a name, it is a reference number. Same for \"County Road 43\". The fixname.py utility scan the OSM extract and when it see incorrect tagging, correct it to the OSM standard. Since the external datasets already follow the same guidelines, this increases the chance of a good match when conflating, since comparing names is part of the process.

    "},{"location":"highways/#forest-road-reference-numbers","title":"Forest Road Reference Numbers","text":"

    I'm a huge believer that the name and reference number in OSM should match the street sign, since that's often what is used for navigation. Unfortunately the MVUM data has many highways with a .1 suffix, which some street signs don't display. Also, depending on the age of the paper maps or digital files, older maps lack the .1 suffix, but newer datasets so have the .1 suffix. Since a .1 suffix may be a spur road of questionable quality, it's an important detail, so included when updating the reference numbers.

    A minor note, the USGS Topographical basemap for JOSM also sometimes lacks the .1 suffix, so can't be used to validate it.

    "},{"location":"highways/#tiger-tag-deletion","title":"TIGER Tag Deletion","text":"

    Since there is community consensus that the tiger: tags added back in 2008 when the TIGER data was imported are meaningless, so should be deleted as bloat. The fixnames.py utility used for correct the name also deletes these from each feature so you don't have to manually do it.

    "},{"location":"highways/#mvum-roads","title":"MVUM Roads","text":"

    This is all the highways in National Forests. The data contains several fields that would be useful in OSM. This dataset has a grading of 1-5 for the type of vehicle that can drive the road, as well as a field for high clearance vehicles only. This is roughly equivalent to the smoothness tag in OSM. The surface type is also included, which is the same as the OSM surface tag. There are other fields for seasonal access, and seasonal road closures. Roads tagged as needing a high clearance vehicle generate a 4wd_only tag for OSM.

    The reference numbers often have a typo, an additional number (often 5 or 7) prefixed to the actual number in the original dataset, and were imported this way. Since the reference number needs to match what the map or street sign says, these all need to be fixed. And there are thousands of these...

    The type of vehicle that can be driven on a particular road is a bit subjective based on ones off-road driving experience. These are typically jeep trails of varying quality, but very useful for back-country rescues or wildland fires.

    "},{"location":"highways/#mvum-trails","title":"MVUM Trails","text":"

    These are Multi Vehicle Use Maps (MVUM), which define the class of vehicle appropriate to drive a road. The trails dataset contains additional highways, as some hiking trails are also forest service roads. These are primarily for hiking, but allow vehicle use, primarily specialized off-road vehicles like an ATV or UTV. They suffer from the same bad data as the MVUM roads.

    "},{"location":"highways/#national-forest-trails","title":"National Forest Trails","text":"

    This dataset is hiking trails that don't allow any vehicle usage at all. Many of these trails are in OSM, but lack the trail name and reference number. These also get used for emergency response as well. If there is a name and reference number for the trail, this makes it easier to refer a location to somebody over a radio instead of GPS coordinates.

    "},{"location":"highways/#usgs-topographical-maps","title":"USGS Topographical maps","text":"

    It's possible to download the vector datasets used to produce topographical maps. Each file covers a single 7.5 map quad, which is 49 miles or 78.85 km square. There are two variants for each quad, a GDB formatted file, and a Shapefile formatted file. The GDB file contains all the data as layers, whereas the Shapefiles have separate files for each feature type. I find the smaller feature based files easier to deal with. The two primary features we want to extract are Trans_RoadSegment and Trans_TrailSegment. Because of the volume of data, I only have a few states downloaded.

    I then used ogrmerge to produce a single file for each feature from all the smaller files. This file covers an entire state. This file has also has many fields we don't need, so only want the same set used for all the datasets. The usgs.py contained in this project is then run to filter the input data file into GeoJson with OSM tagging schema. The topographical data is especially useful for conflation, since the name and reference number match the paper or GeoPDF maps many people use.

    I found a few problems processing the ShapeFiles due to font encoding issues, and also with converting directly to GeoJson. I do this as a two step process, first make a unified ShapeFile from all the other ShapeFiles, and then convert it to GeoJson, which seems to work best.

    ogrmerge.py -nln highways -single -o highways.shp VECTOR_*/Shape/Trans_Road*.shp -lco ENCODING=\"\"\nogr2ogr highways.geojson highways.shp\n
    "},{"location":"highways/#conflation","title":"Conflation","text":"

    Once all the datasets are broken into manageable pieces, and everything is using the OSM tagging schema conflation can start. There are two datasets specified, one is the primary, and the other is the secondary. The tag values in the primary will override the values in the secondary file. To be paranoid about the details, when a tag value is overwritten by the primary data source, the current value becomes old_, ie... name becomes old_name, and then name is updated to the current value. Sometimes when editing the difference in the names is due to abbreviations being used, spelling mistakes, etc... so the old_name can be deleted.

    When conflating multiple datasets, those need to be conflated against each other before conflating with OSM. Since the topographical dataset is what matches a paper map, or GeoPDF, I consider that the primary dataset. The MVUM and trail data are particularly full of mistakes. Sometimes one dataset has a name, and the other doesn't, so conflation here produces that value.

    There are also many, many highways in these areas that in OSM only have highway=something. These are easy to conflate as you are only adding new tags. While in TIGER there are many highway=residential, that should really be highway=unclassified or highway=track, it is entirely possible it is a residential road. There's a lot of nice cabins way out in most national forests. But this is the type of thing you'd really need to ground-truth, and luckily doesn't effect navigation when you are out in a network of unmaintained dirt roads.

    The conflation algorithm is relatively simple at the high level, just find all other highways within a short distance, and then check the slope to eliminate a side road that may be touching. At the lower level, there is a lot of support for dealing with the bugs in the external datasets.

    The conflation algorithm is relatively simple at the high level, just find all other highways within a short distance, and then check the slope to eliminate a side road that may be touching. At the lower level, there is a lot of support for dealing with the bugs in the external datasets.

    "},{"location":"highways/#editing-in-josm","title":"Editing in JOSM","text":"

    Unfortunately manually validating the data is very time consuming, but it's important to get it right. I use the TODO plugin and also a data filter so I just select highways. With the TODO plugin, I add the selected features, ideally the entire task. Then I just go through all the features one at a time. When the OSM XML dataset is loaded, nothing will appear in JOSM. This is because the OSM XML file produced by conflation has the refs for the way, but lack the nodes. All it takes is selecting the update modified menu item under the File menu and all the nodes get downloaded, and the highways appear.

    I often have the original datasets loaded as layers, since sometimes it's useful to refer back to when you find issues with the conflation. Much of the existing data in OSM has many unused tags added during the TIGER import. These also get deleted as meaningless bloat. Some were imported with all the tags from the original dataset which also get deleted. This is life as a data janitor...

    Once you've validated all the features in the task, it can be run through the JOSM validator, and if all is good, uploaded to OSM. Often the JOSM validator finds many existing issues. I fix anything that is an error, and mostly ignore all the warning as that's a whole other project.

    If you are editing with the OSM XML file produced by conflation, when the file is opened, there will be some conflicts. This is usually due to things like the incorrect forest road name getting deleted, since now it's a proper ref:usfs reference number. And the tiger tags are gone as well if the fixnames.py utility is used.

    To fix the conflicts, I just select them all, and click on resolve to my version. Since all the new tags and old tags are preserved, you can edit them directly in the tags window in JOSM. Then I load all the ways into the TODO plugin. You can also use the conflict dialog box to edit the merged tags, but I find the other way more efficient.

    Using the plugin to validate a feature all I have to do is click on the entry. Sometimes there will be issues that need to be manually fixed. If conflation has changed the name, the old one is still in the

    feature so a manual comparison can be done. Often validating a feature is just deleting a few tags. But this is the important detail for machine editing. Somebody (not AI) must manually validate each changed feature. This is why the efficiency of mapping is important if you want to update a large area, like an entire national forest.

    Sometimes there are weird typos that have slipped through the process. This is where the time goes since you have to manually edit the values. But many times for these remote highways you can just mark it as done, and go on to the next one. Many of these highways in OSM have no tags beyond highway=track, so mo conflicts.This lets you validate a large number of features relatively quickly without sacrificing quality.

    "},{"location":"highways/#editing-osm-xml","title":"Editing OSM XML","text":"

    The conflation process produces an output file in OSM XML format. This file has incremented the version number and added action=modify to the attributes for the feature. When loaded into OSM, no data is initially visible. If you go to the File menu, go down and execute update modified. This will download all the nodes for the ways, and all the highways will become visible. Highways that have multiple tags already in OSM will become a conflict. These can be resolved easier in JOSM using the conflict dialog box. No geometries have changed, just tags, so you have to manually select the tags to be merged. Features without tags beyond highway=something merge automatically. which makes validating these features quick and easy. Note that every feature needs to be validated individually.

    "},{"location":"highways/#editing-geojson","title":"Editing GeoJson","text":"

    While JOSM can load and edit GeoJson data, not being in a native OSM format it can't be automatically merge. Instead load the GeoJson file and then create a new OSM layer. I select all the highways in the task, and load them into the TODO plugin. Sometimes there are so few highways, I don't use the TODO plugin. I then cut the tags and values for a feature from the GeoJson file, then switch to the OSM layer, and paste the tags into the feature.

    "},{"location":"highways/#validating","title":"Validating","text":"

    Here's an example of the results of a 3 way conflation. This was between the MVUM data, the topographical data, and OSM data.

    • highway=unclassified
    • lanes=2
    • name=Whisky Park Road
    • operator=US Forest Service
    • ref:usfs=FR 503
    • smoothness=good
    • surface=gravel

    Note that the name is spelled wrong.

    "},{"location":"highways/#splitting-highways","title":"Splitting Highways","text":"

    In national forest lands, the reference number changes at every major intersection. Side roads that branch off have an additional modifier added. or example, the main road may be called ref:usfs=\"FR 505\", with a change to ref:usfs=\"FR 505.1\" when it crosses a state line. Spur roads (often to campsites) get a letter attached, so the spur road is *ref:usfs=\"FR 505.1A\". Understanding how the reference numbers are assigned makes it easy to transmit your location over a radio or phone, and have somebody looking on a map find that location. Much easier than using GPS coordinates.

    For the highways that were traced off of satellite imagery, there is often a problem with forks in the road. Often tree cover or poor resolution imagery makes it hard to see the highway. And a lot of the highways go through an area with an entire network of other dirt roads, so the reference number may just group a bunch of highway segments. Often the most visible highway branch in the imagery at a fork is not the actual road. In this case the highway has to be split at the fork, and the new segment tagged for it's actual value, and the actual highway segment gets tagged correctly. This is critical if you want navigation to work.

    "},{"location":"highways/#ground-truthing","title":"Ground-truthing","text":"

    If you really want detailed and accurate maps, ground-truthing is an important part of the process. Road conditions change, especially the unmaintained dirt roads. Years of erosion, off-road vehicle abuse, etc... all change. For this reason the surface, smoothness and tracktype tags are not merged, as what is in the external datasets is likely out of date. Also sometimes parts of a dirt road get paved, or access is closed off completely.

    This is a good excuse to go there for some hiking and camping fun. You can load data into StreetComplete when online, and then use that in the field since will likely be no cell phone connection. Depending on the software used to collect the data, that may need conflation before uploading, for example OpenDataKit data. Some detail on that process is in this Highway Mapping blog post about a field mapping trip.

    "},{"location":"mvum/","title":"MVUM Conversion","text":"

    The MVUM dataset is all of the motor vehicle roads in a national forest. These are primarily remote dirt roads, often just a jeep track. These are heavily used for back country access for wildland fires and rescues. Currently much of this data has been imported in the past, complete with all the bugs in the dataset.

    This utility program normalizes the data, correcting or flagging bugs as an aid for better conflation.

    The original dataset can be found here on the USDA FSGeodata Clearinghouse website.

    "},{"location":"mvum/#dataset-bugs","title":"Dataset Bugs","text":""},{"location":"mvum/#bad-reference-numbers","title":"Bad Reference Numbers","text":"

    In some areas the MVUM data has had an 5 or a 7 prefixed to the actual reference number. These are all usually in the same area, so I assume whomever was doing data entry had a sticky keyboard, it got messed up when converting from paper maps to digital, who really knows. But it makes that tag worthless.

    Another common problem in the reference nummbers is in some areas the major maintained roads have a .1 appended. All minor part of the number should always have a letter appended. So FR 432.1\" is actually *FR 432\", whereas \"432.1A is correct. This was confirmed by reviewing multiple other map sources, as the paper and PDF version of the dataset has the correct version without the .1 appended. Obviously this dataset is not used to produce the maps you can get from the Forest Service.

    I do notice that in the original MVUM datasets, whomever emp=3.48 is, seems to be the main person with data entry issues. And this seems to apply across the entire western US. Not all highways mapped by 3.48 have this problem, but many do. Chances there other emps have similar issues. I'll keep track, and maybe add the employee ID as a temporary debugging tag in the conflation results. Cleaning up all the wrong reference numbers will make OSM the best map for road and trail navigation on public lands.

    "},{"location":"mvum/#dixie-national-forest","title":"Dixie National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 30 has been prefixed to all the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#manti-lasal-national-forest","title":"Manti-LaSal National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 5 or 7 has been prefixed to many of the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#fishlake-national-forest","title":"Fishlake National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 4 or 40 has been prefixed to some of the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#mount-hood-national-forest","title":"Mount Hood National Forest","text":"

    For some reason, some of the reference numbers have a 000 appended, making the reference numbers wrong. This applies to paved roads, not just remote jeep tracks.

    "},{"location":"mvum/#doesnt-match-the-sign","title":"Doesn't Match The Sign","text":"

    There is an issue with the MVUM reference numbers not matching the sign. This is luckily limited to whether there is a .1 appended to the reference number without an letter at the end. Usually a reference without a .1 is a primary road, and the .1 gets appended for a major branch off that road. While out ground-truthing MVUM roads recently I saw multiple examples where the reference numnber in the MVUM data (and often in OSM) has the .1, so I use that value regardless of what the sign says. It's still quite obviously what the reference number is since the only difference is the .1 suffix.

    This gets more interesting when you compare with other data sources, ie... paper and digital maps. Older data source seem to drop the .1, whereas the same road in a newer version of the dataset has the .1 suffix. So I figure anyone navigating remote roads that checks their other maps would figure out which way to go. So anyway, when way out on remote very_bad or horrible MVUM roads, you should have multiple maps if you don't want to get confused.

    "},{"location":"mvum/#missing-geometry","title":"Missing Geometry","text":"

    There are features with no geometry at all, but the tags all match an existing feature that does have a geometry. These appear to be accidental duplicates, so they get removed.

    "},{"location":"mvum/#dropped-fields","title":"Dropped Fields","text":"

    These fields are dropped as they aren't useful for OpenStreetMap.

    • TE_CN
    • BMP
    • EMP
    • SYMBOL_CODE
    • SEG_LENGTH
    • JURISDICTION
    • SYSTEM
    • ROUTE_STATUS
    • OBJECTIVE_MAINT_LEVEL
    • FUNCTIONAL_CLASS
    • LANES
    • COUNTY
    • CONGRESSIONAL_DISTRICT
    • ADMIN_ORG
    • SERVICE_LIFE
    • LEVEL_OF_SERVICE
    • PFSR_CLASSIFICATION
    • MANAGING_ORG
    • LOC_ERROR
    • GIS_MILES
    • SECURITY_ID
    • OPENFORUSETO
    • IVM_SYMBOL
    • GLOBALID
    • SHAPE_Length
    "},{"location":"mvum/#preserved-fields","title":"Preserved Fields","text":"

    The field names are a bit truncated in the dataset, but these are the

    • ID is id
    • NAME is name
    • OPER_MAINT_LEVEL is smoothness
    • SYMBOL_NAME smoothness
    • SURFACE_TYPE is surface
    • SEASONAL is seasonal
    • PRIMARY_MAINTAINER is operator
    "},{"location":"mvum/#abbreviations","title":"Abbreviations","text":"

    There are multiple and somewhat inconsistent abbreviations in the MVUM dataset highway names. OpenStreetMap should be using the full value. These were all found by the conflation software when trying to match names between two features. Since much of the MVUM data is of varying quality, there's probably a few not captured here that will have to be fixed when editing the data. This however improves the conflation results to limit manual editing.

    • \" Cr \" is \" Creek \"
    • \" Cr. \" is \" Creek \"
    • \" Crk \" is \" Creek \"
    • \" Cg \" is \" Campground \"
    • \" Rd. \" is \" Road\"
    • \" Mt \" is \" Mountain\"
    • \" Mtn \" is \" Mountain\"
    "},{"location":"mvum/#tag-values","title":"Tag values","text":""},{"location":"mvum/#oper_maint_level","title":"OPER_MAINT_LEVEL","text":"

    This field is used to determine the smoothness of the highway. Using the official forest service guidelines for this field, convienently they publish a Road Maintaince Guidelines, complete with muiltiple pictures and detaild technical information on each level. The coorelate these values, I did some ground-truthing on MVUM and I'd agree that level 2 is definetely high clearance vehicle only, and that it fits the definition here for very_bad, although some sections were more horrible, deeply rutted, big rocks, lots of erosion.

    • 5 -HIGH DEGREE OF USER COMFORT: Assigned to roads that provide a high degree of user comfort and convenience. This becomes smoothness=excellent.

    • 4 -MODERATE DEGREE OF USER COMFORT: Assigned to roads that provide a moderate degree of user comfort and convenience at moderate travel speeds. This becomes smoothness=bad.

    • 3 -SUITABLE FOR PASSENGER CARS: Assigned to roads open for and maintained for travel by a prudent driver in a standard passenger car. This becomes smnoothness=good.

    • 2 -HIGH CLEARANCE VEHICLES: Assigned to roads open for use by high clearance vehicles. This adds 4wd_only=yes and becomes smoothness=vary_bad.

    • 1 -BASIC CUSTODIAL CARE (CLOSED): Assigned to roads that have been placed in storage (> one year) between intermittent uses. Basic custodial maintenance is performed. Road is closed to vehicular traffic. This becomes access=no

    "},{"location":"mvum/#symbol_name","title":"SYMBOL_NAME","text":"

    Sometimes OPER_MAINT_LEVEL doesn't have a value, so this is used as a backup. These values are not used to update the existing values in OSM, they are only used for route planning ground-truthing trips.

    • Gravel Road, Suitable for Passenger Car becomes surface=gravel
    • Dirt Road, Suitable for Passenger Car becomes surface=dirt
    • Road, Not Maintained for Passenger Car becomes smoothness=very_bad
    • Paved Road becomes surface=paved
    "},{"location":"mvum/#surface_type","title":"SURFACE_TYPE","text":"

    This is another field that is converted, but not used when editing the existing OSM feature. This can only really be determined by ground-truthing, but it converted as another aid for route planning.

    • AGG -CRUSHED AGGREGATE OR GRAVEL becomes surface=gravel
    • AC -ASPHALT becomes surface=asphalt
    • IMP -IMPROVED NATIVE MATERIAL becomes surface=compacted
    • CSOIL -COMPACTED SOIL becomes surface=compacted
    • NAT -NATIVE MATERIAL becomes surface=dirt
    • P - PAVED becomes surface=paved
    "},{"location":"mvum/#name","title":"Name","text":"

    The name is always in all capitol letters, so this is converted to a standard first letter of every word is upper case, the rest is lower case.

    "},{"location":"mvum/#options","title":"Options","text":"
    -h, --help            show this help message and exit\n-v, --verbose         verbose output\n-i INFILE, --infile INFILE MVUM data file\n-c, --convert         Convert MVUM feature to OSM feature\n-o OUTFILE, --outfile OUTFILE Output GeoJson file\n
    "},{"location":"odkconflation/","title":"Conflating OpenDataKit with OpenStreetMap","text":"

    Typically conflation is done when doing data imports, but not always. Data collected in the field can be considered an import. Conflating buildings or POIs from external data is relatively easy as it's already been cleaned up and validated. When you are doing field mapping, then you have to cleanup and validate the data during conflation. This is a time consuming process even with good conflation software.

    I've worked with multiple conflation software over the years. Hootenanny, OpenJump (later forked into RoadMatcher), etc... which currently are now dead projects. Conflation is a hard technical challenge and often the results are poor and unstatisfing result. For smalller datasets often it's easier to do do manual conflation using JOSM or Qgis. This project tries to simply the problem by focusing on OpenStreetMap data.

    "},{"location":"odkconflation/#smartphone-data-collection","title":"Smartphone Data Collection","text":"

    While commercial organizations may use expensive GPS devices, most of us that do data collection as a volunteer or for an NGO use their smartphone. Their is a variety of smartphone apps for data collection that fall ihnto two categories. The first category are the apps like Vespucci, StreetComplete, and Organic Maps. These directly upload to OpenStreetMap. These are great for the casual mapper who only adds data occasionally and is limited to a POI. For example, a casual mapper may want to add the restaurant they are currrently eating in when they notices it's not in OpenStreetMap. In addition, they probably have a cell phone connection, so the data gets added right away.

    The other category are apps like ODK Collect, QField ArcGIS Field Maps which are oriented to larger scale mapping projects, often offline without any cellular connection. These collect a lot of data that then needs to get processed later. And conflation is part of this process.

    All of these smartphone based data collection apps suffer from poor GPS location accuracy. Modern smartphones (2024) are often 5-9 meters off the actual location, sometimes worse. In addition when field data collecting, you can't always record the actual location you want, you can only record where you are standing.

    You can improve the location data somewhat if you have a good quality basemap, for example you see a building within a courthouse wall when you are standing in the street. If you have a basemap, typically satellite imagery, you can touch the location on the basemap, and use that instead of where you are standing. Then later when conflating, you have a much higher chance the process will be less painful.

    "},{"location":"odkconflation/#opendatakit","title":"OpenDataKit","text":"

    OpenDataKit is a format for data import forms used to collect custom data. The source file is a spreadsheet, called an XLSForm. This gets used by the mobile app for the quesion and answer process defined by the XLSForm. There are multiple apps and projects using XLSForms, so it's well supported and maintained.

    The XLS source file syntax is a bit wierd at first, being a spreadsheet, so the osm-fieldwork project contains tested XLSForm templates for a variety of mapping project goals. These can be used to create efficient XForms that are easy to convert to OSM. The primary task when manually converting ODK collected data into OSM format is converting the tags. If the XLSForm is created with a focus towards OSM the XLSForm can make this a much simpler process. This is detailed more in this document. Simply stated, what is in the name colum in the XLSForm becomes the name of the tag in OSM, and the response from the choices sheet becomes the value.

    "},{"location":"odkconflation/#odk-collect-central","title":"ODK Collect & Central","text":"

    ODK Collect is a mobile app for data collection using XLSForms. It's server side is ODK Central, which replaces the older ODK Aggregate. ODK Central manages the XLSForms downloaded to your phone, as wall as the submissions uploaded from your phone when back online.

    A related project for processing ODK data and working remotely with Central is osm-fieldwork. This Python project handles conversion of the various data files from Collect or Central, into OSM XML and GeoJson for future processing via editing or conflation. This is heavily used in the FMTM backend.

    "},{"location":"odkconflation/#field-data-collection","title":"Field Data Collection","text":"

    Collecting data in the field is to best way to add data to OpenStreetMap. Whether done by casual mappers adding POIs, to more dedicated mappers, what is reality at that moment is the key to keeping OSM fresh and updated. When it comes to improving the metadata for buildings, many have been imported with building=yes from remote mapping using the HOT Tasking Manager to trace buildings from satellite imagery.

    But ground-truthing what kind of building it is improvers the map. It may be a medical clinic, restaurant, residence, etc.. who know until somebody stands in front of the building to collect more informsation about it. This may be idenifying it as a clinic or reseidence, adding the building material, what is the roof made of, is it's power non-existance, or are there solar panels or a generator ? Some humanitarian mapping is collecting data on public toilets, and community water sources for future improvements.

    Knowing there is a building on the map is useful, but better yet is what is the building used for ? What is it made of ? Does it have AC or DC power ? Water available ? All of these details improve the map to make it more useful to others.

    "},{"location":"odkconflation/#field-mapping-camping-manager","title":"Field Mapping Camping Manager","text":"

    The Field Mapping Camping Manager (FMTM) is a project to oprganize large scale data collection using ODK Collect and ODK Central. It uses the osm-fieldwork project for much of the backend processing of the ODK data, but is designed for large scale field mapping involving many people. It uses ODK Collect and ODK Central as the primary tools. One of the final steps in processing ODK data to import into OSM is conflating it with existing data. This can be done manually of course, but with a large number of data submissions this becomes tedious and time consuming. FMTM aggrgates all the data for an entire project, and may have thousands of submissions. This is where conflation is critical.

    "},{"location":"odkconflation/#the-algorythm","title":"The Algorythm","text":"

    Currently conflation is focused on ODK with OSM. This uses the conflator.py program which can conflate between the ODK data and an OSM data extract. There are other conflation programs in this project for other external datasets, but uses a postgres database instead of two files.

    "},{"location":"odkconflation/#the-conflator-class","title":"The Conflator() Class","text":"

    This is the primary interface for conflating files. It has two primary endpoint. This top level endpoint is Conflator.conflateFiles(), which is used when the conflator program is run standalone. It opens the two disk files, parses the various formats, and generates a data structure used for conflation. This class uses the Parsers() class from osm-fieldwork that can parse the JSON or CSV files downloaded from ODK Central, or the ODK XML \"instance\" files when working offline. OPSM XML or GeoJson files are also supported. Each entry in the files is turned into list of python dicts to make it easier to compaert the data.

    Once the two files are read, the Conflator.conflateFeatures() endpoint takes the two lists of data and does the actual conflation. There is an additional parameter passed to this endpoint that is the threshold distance. This is used to find all features in the OSM data extract within that distance. Note that this is a unit of the earth's circumforance, not meters, so distance calulations are a bit fuzzy.

    This is a brute force conflation algorythm, not fast but it tries to be complete. it is comprised of two loops. The top level loops through the ODK data. For each ODK data entry, it finds all the OSM features within that threshold distance. The inner loop then uses the closest feature and compares the tags. This is where things get interesting.... If there is a name tag in the ODK data, this is string compared with the name in the closest OSM feature. Fuzzy string matching is used to handle minor spelling differences. Sometimes the mis-spelling is in the OSM data, but often when entering names of features on your smartphone, mis-typing occurs. If there is a 100% match in the name tags, then chances are the feature exists in OSM already.

    If there is no name tag in the ODK data, then the other tags are compared to try to find a possible duplicate feature. For example, a public toilet at a trailhead has no name, but if both ODK and OSM have amenity=toilet, then it's very likey a duplicate. If no tags match, then the ODK data is proably a new feature.

    Any time a possible duplicate is found, it is not automatically merged. Instead a fixme tag is added to the feature in the output file with a statement that it is potentially a duplicate. When the output file is loaded into JOSM, you can search for this tag to manually decide if it is a duplicate.

    "},{"location":"odkconflation/#xlsform-design","title":"XLSForm Design","text":"

    Part of the key detail to improve conflation requires a carefully created XLSForm. There is much more detailed information on XLSForm design, but briefly whatever is in the name column in the survey sheet becomes the name of the tags, and whatever is in the name column in the choices sheet becomes the value. If you want a relatively smooth conflation, make sure your XLSForm uses OSM tagging schemas.

    If you don't follow OSM tagging, then conflation will assumme all your ODK data is a new feature, and you'll have to manually conflate the results using JOSM. That's OK for small datasets, but quickly becomes very tedious for the larger datasets that FMTM collects.

    "},{"location":"odkconflation/#the-output-file","title":"The Output File","text":"

    The output file must be in OSM XML to enable updating the ways. If the OSM data is a POI, viewing it in JOSM is easy. If the OSM data is a polygon, when loaded into JOSM, they won't appear at first. Since the OSM way created by conflation has preserved the refs used by OSM XML to reference the nodes, doing update modified in JOSM then pulls down the nodes and all the polygons will appear.

    "},{"location":"odkconflation/#conflicts","title":"Conflicts","text":"

    There are some interesting issues to fix post conflation. ODK data is usually a single POI, whereas in OSM it may be a polygon. Sometimes though the POI is already in OSM. Remote mapping or building footprint imports often have a polygon with a single building=yes tag. If the POI we collected in ODK has more data, for example this building is a restaurant serving pizza, and is made of brick.

    In OSM sometimes there is a POI for an amenity, as well as a building polygon that were added at different times by different people. The key detail for conflation is do any of the tags and values from the new data match existing data ?

    FMTM downloads a data extract from OSM using osm-rawdata, and then filters the data extract based on what is on the choices sheet of the XLSForm. Otherwise Collect won't launch. Because this data extract does not contain all the tags that are in OSM, it creates conflicts. This problem is FMTM specific, and can be improved by making more complete data extract from OSM.

    When the only tag in the OSM data is building=, any tags from ODK are merged with the building polygon when possible. If the OSM feature has other tags, JOSM will flag this as a conflict. Then you have to manually merge the tags in JOSM.

    "},{"location":"osm-merge/","title":"Conflator Program","text":"

    osm-merge is a program that conflates building footprint data with OpenStreetMap data to remove duplicates. The result of the conflation process is buildings that only exist in the footprints data file.

    This program can process data from either a postgres database, or data files in geojson, shapefile format. One of the core concepts is using a data file of polygons to filter the larger datasets, since a database may contain multiple countries.

    The process of setting up for large scale conflation is in this document.

    "},{"location":"osm-merge/#command-line-options","title":"Command Line Options","text":""},{"location":"osm-merge/#common-options","title":"Common Options","text":"

    These are the nost commonly used options.

    --help(-h)       Get command line options\n--verbose(-v)    Enable verbose output\n--boundary(-b)   Specify a multipolygon for boundaries, one file for each polygon\n--project(-p)    Tasking Manager project ID to get boundaries from database\n--osmdata(-x)    OSM XML/PBF or OSM database to get boundaries (prefix with pg: if database)\n--outdir(-o)     Output file prefix for output files (default \"/tmp/tmproject-\")\n--footprints(-f) File or building footprints Database URL (prefix with pg: if database)\n--dbhost(-d)     Database host, defaults to \"localhost\"\n--dbuser(-u)     Database user, defaults to current user\n--dbpass(-w)     Database user, defaults to no password needed\n
    "},{"location":"osm-merge/#tasking-manager-options","title":"Tasking Manager Options","text":"

    These options are used to dynamically extract a project boundary from a Tasking Manager database. A more common usage is to use the splitter.py program to download the project boundary from the Tasking Manager itself.

    --splittasks     When using the Tasking Manager database, split into tasks\n--schema         OSM database schema (pgsnapshot, ogr2ogr, osm2pgsql) defaults to \"pgsnapshot\"\n--tmdata(-t)     Tasking Manager database to get boundaries if no boundary file prefix with pg: for database usage, http for REST API\n
    "},{"location":"osm-merge/#osm-options","title":"OSM Options","text":"

    When extracting administrative boundaries from an OpenStreetMap database, the default admin levl is 4, which is commonly used for couty boundaries. This lets the user select what level of administrative boundaries they want.

    --admin(-a)      When querying the OSM database, this is the admin_level, (defaults to 4)\n
    "},{"location":"osm-merge/#examples","title":"Examples","text":"

    PATH/conflator.py -v -x 12057-osm.geojson -f 12057-ms.geojson -o 12057

    This takes two disk files, which have already been filtered to only contain data for the area to conflate.

    PATH/conflator.py -v -x pg:kenya -b 12007-project.geojson -f 12057-ms.geojson -o 12057

    This uses a database that contains all of Kenya, but we only want to process a single project, so that's supplied as the boundary. The foorptin data was already filtered using ogr2ogr, and the project ID is used as the prefix for the output files.

    PATH/conflator.py -v -x pg:kenya -b 12007-project.geojson -f pg:kenya_footprints -o 12057 -d mapdb -u me

    This is the same except the database is on a remote machine called mapdb and the user needs to be me.

    PATH/conflator.py -t tmsnap -p 8345 -b pg:kenya_foot -o pg:Kenya

    Reads from 3 data sources. The first one is a snapshot of the Tasking Manager database, and we want to use project 8345 as the boundary. The two data sources are prefixed with \"pg\", which defines them as a database URL instead of a file. The database needs to be running locally in this case.

    "},{"location":"trails/","title":"National Park Service Trails","text":"

    This processes both the National Park Service trails dataset, and the National Forest Service trail datasets. The schema of the two datasets is very similar. One of the differences is for Park Service Trails has two default tags in the output file which are bicycle=no and motor_vehicle=no. These default tags are documented here.

    This dataset is available in a variety of formats from the ArcGIS Hub.

    "},{"location":"trails/#processed-fields","title":"Processed Fields","text":"

    These are the fields extracted from the data that are converted to OpenStreetMap syntax so they can be conflated.

    • OBJECTID becomes id
    • TRLNAME becomes name
    • TRLCLASS becomes sac_scale
    • TRLUSE becomes yes for horse, bicycle, atv, etc...
    • TRLALTNAME becomes alt_name
    • SEASONAL becomes seasonal
    • MAINTAINER becomas operator
    • TRLSURFACE becomes surface
    "},{"location":"trails/#dropped-fields","title":"Dropped Fields","text":"

    These fields are all ignored, and are dropped from the output file.

    • MAPLABEL
    • TRLSTATUS
    • TRLTYPE
    • PUBLICDISP
    • DATAACCESS
    • ACCESSNOTE
    • ORIGINATOR
    • UNITCODE
    • UNITNAME
    • UNITTYPE
    • GROUPCODE
    • GROUPNAME
    • REGIONCODE
    • CREATEDATE
    • EDITDATE
    • LINETYPE
    • MAPMETHOD
    • MAPSOURCE
    • SOURCEDATE
    • XYACCURACY
    • GEOMETRYID
    • FEATUREID
    • FACLOCID
    • FACASSETID
    • IMLOCID
    • OBSERVABLE
    • ISEXTANT
    • OPENTOPUBL
    • ALTLANGNAM
    • ALTLANG
    • NOTES
    "},{"location":"trails/#national-forest-service-trails","title":"National Forest Service Trails","text":"

    The US Forest Service makes much of their data publically accessible, so it's been a source for imports for a long time. There is a nice detailed wiki page on the Forest Service Data. The conversion process handles most of the implementation details.

    "},{"location":"trails/#keep-fields","title":"Keep Fields","text":"

    The two primary fields are TRAIL_NO, which is used for the ref:usfs tags, and TRAIL_NAME, which is the name of the trail. In addition to these

    "},{"location":"trails/#the-5-variations","title":"The 5 Variations","text":"

    For many of the features classes, there are 5 variations on each one which is used for access.

    • Managed: Usage allowed and managed by the forest service
    • Accepted: Usage is accepted year round
    • Accepted/Discouraged: Usage is accepted, but discouraged
    • Restricted: Usage is restricted
    • Discouraged: Usage is discouraged

    These are converted to the apppropriate value.

    • Managed* sets the keyword to designated
    • Accepted* sets the keyword to yes
    • Restricted* sets the keyword to no
    • Discouraged* sets the keyword to discouraged
    • Accepted/Discouraged* sets the keyword to permissive

    Many of the values for these are NULL, so ignored when generating the output file. If the value exists, it's either a Y or a N, which is used to set the values. For example: \"SNOWMOBILE\": \"Y\" becomes snowmobile=yes in the output file.

    • PACK_SADDLE_ becomes horse=
    • BICYCLE_ becomes bicycle=
    • MOTORCYCLE_ becomes motorcycle=
    • ATV_ becoms atv=
    • FOURWD_ becomes 4wd_only
    • SNOWMOBILE_ becomes snowmobile=
    • SNOWSHOE_ becomes snowwhoe=
    • XCOUNTRY_SKI_ becomes ski

    Currently these fields appear to be empty, but that may change in the future.

    • SNOWCOACH_SNOWCAT_
    • SNOWCOACH_SNOWCAT_
    • E_BIKE_CLASS1_
    • E_BIKE_CLASS2_
    • E_BIKE_CLASS3_

    This field is ignored as it's assumed the trail is accessible by hikers.

    • HIKER_PEDESTRIAN_
    "},{"location":"trails/#dropped-fields_1","title":"Dropped Fields","text":"

    These fields are dropped as unnecessary for OSM. Manye only have a NULL value anyway, so useless.

    • MOTOR_WATERCRAFT_
    • NONMOTOR_WATERCRAFT_
    • GIS_MILES
    • Geometry Column
    • TRAIL_TYPE
    • TRAIL_CN
    • BMP
    • EMP
    • SEGMENT_LENGTH
    • ADMIN_ORG
    • MANAGING_ORG
    • SECURITY_ID
    • ATTRIBUTESUBSET
    • NATIONAL_TRAIL_DESIGNATION
    • TRAIL_CLASS
    • ACCESSIBILITY_STATUS
    • TRAIL_SURFACE
    • SURFACE_FIRMNESS
    • TYPICAL_TRAIL_GRADE
    • TYPICAL_TREAD_WIDTH
    • MINIMUM_TRAIL_WIDTH
    • TYPICAL_TREAD_CROSS_SLOPE
    • SPECIAL_MGMT_AREA
    • TERRA_BASE_SYMBOLOGY
    • MVUM_SYMBOL
    • TERRA_MOTORIZED
    • SNOW_MOTORIZED
    • WATER_MOTORIZED
    • ALLOWED_TERRA_USE
    • ALLOWED_SNOW_USE
    "},{"location":"trails/#options","title":"Options","text":"
    -h, --help            show this help message and exit\n-v, --verbose         verbose output\n-i INFILE, --infile INFILE input data file\n-c, --convert         Convert feature to OSM feature\n-o OUTFILE, --outfile OUTFILE Output GeoJson file\n
    "},{"location":"usgs/","title":"US Topographical Data","text":""},{"location":"usgs/#us-topographical-trails","title":"US Topographical Trails","text":"
    • OBJECTID
    • permanenti
    • name
    • namealtern
    • trailnumbe
    • trailnum_1
    • sourcefeat
    • sourcedata
    • sourceda_1
    • sourceorig
    • loaddate
    • trailtype
    • hikerpedes
    • bicycle
    • packsaddle
    • atv
    • motorcycle
    • ohvover50i
    • snowshoe
    • crosscount
    • dogsled
    • snowmobile
    • nonmotoriz
    • motorizedw
    • primarytra
    • nationaltr
    • lengthmile
    • networklen
    • SHAPE_Leng
    "},{"location":"usgs/#us-topographical-highways","title":"US Topographical Highways","text":"
    • OBJECTID
    • permanent_
    • source_fea
    • source_dat
    • source_d_1
    • source_ori
    • loaddate
    • interstate
    • us_route
    • state_rout
    • county_rou
    • federal_la
    • stco_fipsc
    • tnmfrc
    • name
    • mtfcc_code
    • intersta_1
    • intersta_2
    • intersta_3
    • us_route_a
    • us_route_b
    • us_route_c
    • state_ro_1
    • state_ro_2
    • state_ro_3
    • SHAPE_Leng
    "},{"location":"utilities/","title":"Utility Programs","text":"

    To conflate external datasets with OSM, the external data needs to be converted to the OSM tagging schema. Otherwise comparing tags gets very convoluted. Since every dataset uses a different schema, included are a few utility programs for converting external datasets. Currently the only datatsets are for highways. These datasets are available from the USDA, and have an appropriate license to use with OpenStreetMap. Indeed, some of this data has already been imported. The files are available from the FSGeodata Clearinghouse

    Most of the fields in the dataset aren't needed for OSM, only the reference number if it has one, and the name. Most of these highways are already in OSM, but it's a bit of a mess, and mostly invalidated. Most of the problems are related to the TIGER import in 2007. So the goal of these utilities is to add in the TIGER fixup work by updating or adding the name and a reference number. These utilities prepare the dataset for conflation.

    There are other fields in the datasets we might want, like surface type, is it 4wd only, etc... but often the OSM data is more up to date. And to really get that right, you need to ground truth it.

    "},{"location":"utilities/#mvumpy","title":"mvum.py","text":"

    This converts the Motor Vehicle Use Map(MVUM) dataset that contains data on highways more suitable for offroad vehicles. Some require specialized offroad vehicles like a UTV or ATV. The data in OSM for these roads is really poor. Often the reference number is wrong, or lacks the suffix. We assume the USDA data is correct when it comes to name and reference number, and this will get handled later by conflation.

    "},{"location":"utilities/#roadcorepy","title":"roadcore.py","text":"

    This converts the Road Core vehicle map. This contains data on all highways in a national forest. It's similar to the MVUM dataset.

    "},{"location":"utilities/#trailspy","title":"trails.py","text":"

    This converts the NPSPublish Trail dataset. These are hiking trails not open to motor vehicles. Currently much of this dataset has empty fields, but the trail name and reference number is useful. This utility is to support the OpenStreetMap US Trails Initiative.

    "},{"location":"utilities/#usgspy","title":"usgs.py","text":"

    This converts the raw data used to print Topographical maps in the US. This obviously is a direct source when it comes to names if you want to be accurate. Although things do change over time, so you still have to validate it all. The files are available from the National Map. I use the Shapefiles, as the different categories are in separate files inside the zip. Each one covers a 7.5 quad square on a topo map. These have to be merged together into a single file to be practical.

    "},{"location":"utilities/#osmhighwayspy","title":"osmhighways.py","text":"

    On the OSM wiki, there is a list of incorrect tagging for forest highway names. Basically the name shouldn't be something like \"Forest Service Road 123.4A\". That's actually a reference number, not a name. This is primarily a problem with existing OSM data. These would all have to get manually fixed when validating in JOSM, so this program automates the process so you only have to validate, and not edit the feature. This also extracts only highway linestrings, so is used to create the OSM dataset for conflation. Since the other external datasets also correctly use name, ref, and ref:usfs, this simplifys conflation. Otherwise the algorithm would get very complicated and hard to maintain.

    "},{"location":"wiki_redirect/","title":"OSM RawData","text":"

    Please see the docs page at: https://hotosm.github.io/conflator/

    "},{"location":"zion/","title":"Analyzing Zion National Park Trails","text":"

    As an aid to debugging my conflation software, I decided to use Zion National Park trail data. This involved two external datasets, USGS vector topographical maps and the National Park Service trails dataset.The Topographical maps are in ShapeFile format, the NPS trails is in GeoJson.

    The topographical dataset has many more attributes than the NPS dataset. For example, the topo dataset contains access information, which is one of the goals of the Trail Access Project. One of the details I noticed was having a value of designated instead of yes if the trail is in an official source. There are multiple access types, horse, bicycles, etc... having them be no might be useless data as it could be assumed if the access is allowed.

    \"properties\": {\n    \"highway\": \"path\",\n    \"source\": \"National Park Service\",\n    \"bicycle\": \"no\",\n    \"atv\": \"no\",\n    \"horse\": \"designated\",\n    \"motorcycle\": \"no\",\n    \"snowmobile\": \"no\"\n    },\n
    "},{"location":"zion/#conflating-with-openstreetmap","title":"Conflating with OpenStreetMap","text":"

    One big difference is that the OpenStreetMap dataset has many more features tagged with highway than the other datasets. OSM has mucn more detail, campground loop roads, service roads,

    Topo Trails Coalpits Wash Trail (official) Dalton Wash Trail (BLM ?) Huber Wash Trail (not sure) Left Fork North Creek Trail aka Subway (official)

    The Subway (Bottom) in Topo and Left Fork North Creek Trail in OSM

    Pa'rus Trail is same in topo and nps, not in OSM.

    Deertrap Mountain Trail, or Cable Mountain.

    nps:COMMENT=062904-GPSed for cultural projects coverage nps:EDIT_DATE=082004 nps:ED_COMMENT=063004-removed spikes from arc nps:MILES=0.182262

    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"OSM Merge","text":"

    Merge features and tags into existing OSM data.

    \ud83d\udcd6 Documentation: https://hotosm.github.io/osm-merge/

    \ud83d\udda5\ufe0f Source Code: https://github.com/hotosm/osm-merge

    "},{"location":"#background","title":"Background","text":"

    This is a project for conflating map data, with the ultimate goal of importing it into OpenStreetMap. It is oriented towards processing non OSM external datasets.

    This project uses a huge amount of data (and disk space) if you start from the original nation wide datasets, which are too large to edit. There is a contrib script in the git sources I use to start breaking down the huge files into managable pieces.

    The goal of this project is two-fold. One is to support field data collection using OpenDataKit. The osm-fieldwork project can be used to convert the ODK data files into GeoJson and OSM XML. This project then supports conflating that field collected data with current OpenStreetMap. Otherwise this is a time-consuming process to do manually.

    The other goal is focused on emergency access in remote areas. This is improving the Motor Vehicle Use Map (MVUM) datasets of all highways (mostly jeep trails) in OpenStreetMap. The current data in OSM was often imported complete with bugs in the original dataset, or the only details are highway=track. All of these have a US forest service reference number and name. Adding those makes it much easier to communicate a location.

    "},{"location":"#programs","title":"Programs","text":""},{"location":"#conflatorpy","title":"conflator.py","text":"

    This program doesn't require a database, unlike the other conflation programs. It is focused on conflation OpenDataKit with OpenStreetMap, as well as conflating rural highways. It'll conflate any two datasets in either GeoJson or OSM XML format. While this is currently under heavy development and debugging by processing large amounts of data to track down all the obscure bugs in the original datasets, or the conflation process.

    "},{"location":"#the-data","title":"The Data","text":"

    Much of the process of conflation is splitting huge datasets into managable sized files. Since that process is mostly automated, I have a collection of files where I have done that part. Since conflation also requires converting the original datasets, the original files are included, the converted files to OSM XML & GeoJson, and the results of conflation. Not all the national forests and parks have been conflated yet, but the data is there for others that may wish to try. The Map Data is on a slow server, sorry. Disk space is cheaper than network bandwidth.

    "},{"location":"CHANGELOG/","title":"Changelog","text":""},{"location":"LICENSE/","title":"GNU AFFERO GENERAL PUBLIC LICENSE","text":"

    Version 3, 19 November 2007

    Copyright (C) 2007 Free Software Foundation, Inc. https://fsf.org/

    Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.

    "},{"location":"LICENSE/#preamble","title":"Preamble","text":"

    The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software.

    The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users.

    When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.

    Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software.

    A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public.

    The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version.

    An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license.

    The precise terms and conditions for copying, distribution and modification follow.

    "},{"location":"LICENSE/#terms-and-conditions","title":"TERMS AND CONDITIONS","text":""},{"location":"LICENSE/#0-definitions","title":"0. Definitions.","text":"

    \"This License\" refers to version 3 of the GNU Affero General Public License.

    \"Copyright\" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.

    \"The Program\" refers to any copyrightable work licensed under this License. Each licensee is addressed as \"you\". \"Licensees\" and \"recipients\" may be individuals or organizations.

    To \"modify\" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a \"modified version\" of the earlier work or a work \"based on\" the earlier work.

    A \"covered work\" means either the unmodified Program or a work based on the Program.

    To \"propagate\" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.

    To \"convey\" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.

    An interactive user interface displays \"Appropriate Legal Notices\" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.

    "},{"location":"LICENSE/#1-source-code","title":"1. Source Code.","text":"

    The \"source code\" for a work means the preferred form of the work for making modifications to it. \"Object code\" means any non-source form of a work.

    A \"Standard Interface\" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.

    The \"System Libraries\" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A \"Major Component\", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.

    The \"Corresponding Source\" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.

    The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.

    The Corresponding Source for a work in source code form is that same work.

    "},{"location":"LICENSE/#2-basic-permissions","title":"2. Basic Permissions.","text":"

    All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.

    You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.

    Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.

    "},{"location":"LICENSE/#3-protecting-users-legal-rights-from-anti-circumvention-law","title":"3. Protecting Users' Legal Rights From Anti-Circumvention Law.","text":"

    No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.

    When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures.

    "},{"location":"LICENSE/#4-conveying-verbatim-copies","title":"4. Conveying Verbatim Copies.","text":"

    You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.

    You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.

    "},{"location":"LICENSE/#5-conveying-modified-source-versions","title":"5. Conveying Modified Source Versions.","text":"

    You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:

    • a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
    • b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to \"keep intact all notices\".
    • c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
    • d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.

    A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an \"aggregate\" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.

    "},{"location":"LICENSE/#6-conveying-non-source-forms","title":"6. Conveying Non-Source Forms.","text":"

    You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:

    • a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
    • b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
    • c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
    • d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
    • e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.

    A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.

    A \"User Product\" is either (1) a \"consumer product\", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, \"normally used\" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.

    \"Installation Information\" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.

    If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).

    The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.

    Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.

    "},{"location":"LICENSE/#7-additional-terms","title":"7. Additional Terms.","text":"

    \"Additional permissions\" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.

    When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.

    Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:

    • a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
    • b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
    • c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
    • d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
    • e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
    • f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.

    All other non-permissive additional terms are considered \"further restrictions\" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.

    If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.

    Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.

    "},{"location":"LICENSE/#8-termination","title":"8. Termination.","text":"

    You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).

    However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.

    Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.

    Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.

    "},{"location":"LICENSE/#9-acceptance-not-required-for-having-copies","title":"9. Acceptance Not Required for Having Copies.","text":"

    You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.

    "},{"location":"LICENSE/#10-automatic-licensing-of-downstream-recipients","title":"10. Automatic Licensing of Downstream Recipients.","text":"

    Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.

    An \"entity transaction\" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.

    You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.

    "},{"location":"LICENSE/#11-patents","title":"11. Patents.","text":"

    A \"contributor\" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's \"contributor version\".

    A contributor's \"essential patent claims\" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, \"control\" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.

    Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.

    In the following three paragraphs, a \"patent license\" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To \"grant\" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.

    If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. \"Knowingly relying\" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.

    If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.

    A patent license is \"discriminatory\" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.

    Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.

    "},{"location":"LICENSE/#12-no-surrender-of-others-freedom","title":"12. No Surrender of Others' Freedom.","text":"

    If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.

    "},{"location":"LICENSE/#13-remote-network-interaction-use-with-the-gnu-general-public-license","title":"13. Remote Network Interaction; Use with the GNU General Public License.","text":"

    Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph.

    Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License.

    "},{"location":"LICENSE/#14-revised-versions-of-this-license","title":"14. Revised Versions of this License.","text":"

    The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.

    Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License \"or any later version\" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation.

    If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program.

    Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.

    "},{"location":"LICENSE/#15-disclaimer-of-warranty","title":"15. Disclaimer of Warranty.","text":"

    THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

    "},{"location":"LICENSE/#16-limitation-of-liability","title":"16. Limitation of Liability.","text":"

    IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

    "},{"location":"LICENSE/#17-interpretation-of-sections-15-and-16","title":"17. Interpretation of Sections 15 and 16.","text":"

    If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.

    END OF TERMS AND CONDITIONS

    "},{"location":"LICENSE/#how-to-apply-these-terms-to-your-new-programs","title":"How to Apply These Terms to Your New Programs","text":"

    If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.

    To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the \"copyright\" line and a pointer to where the full notice is found.

        <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU Affero General Public License as\n    published by the Free Software Foundation, either version 3 of the\n    License, or (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU Affero General Public License for more details.\n\n    You should have received a copy of the GNU Affero General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n

    Also add information on how to contact you by electronic and paper mail.

    If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a \"Source\" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements.

    You should also get your employer (if you work as a programmer) or school, if any, to sign a \"copyright disclaimer\" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see https://www.gnu.org/licenses/.

    "},{"location":"about/","title":"Conflator","text":"

    This is a project for conflating map data, with the ultimate goal of importing it into OpenStreetMap(OSM).

    It is oriented towards conflating external datasets with existing OSM data. External data is usually polygons (building footprints), or POIs. These days there are multiple publically available building footprint datasets with an appropriate license for OSM. The problem is this data needs to be validated.

    Due to the flexibility of the OSM data schema, it's impossible to get 100% perfect conflation. But the purely manual conflation is very time consuming and tedious. This project aims to do as much as possible to aid the validator to make their work as efficient as possible.

    "},{"location":"api/","title":"API Docs for conflator","text":""},{"location":"api/#conflatorpy","title":"conflator.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default uri str

    URI for the primary database

    None boundary str

    Boundary to limit SQL queries

    None

    Returns:

    Type Description Conflator

    An instance of this object

    Source code in osm_merge/conflator.py
    def __init__(self,\n             uri: str = None,\n             boundary: str = None\n             ):\n    \"\"\"\n    Initialize Input data sources.\n\n    Args:\n        uri (str): URI for the primary database\n        boundary (str, optional): Boundary to limit SQL queries\n\n    Returns:\n        (Conflator): An instance of this object\n    \"\"\"\n    self.postgres = list()\n    self.tags = dict()\n    self.boundary = boundary\n    self.dburi = uri\n    self.primary = None\n    if boundary:\n        infile = open(boundary, 'r')\n        self.boundary = geojson.load(infile)\n        infile.close()\n    # Distance in meters for conflating with postgis\n    self.tolerance = 7\n    self.data = dict()\n    self.analyze = (\"building\", \"name\", \"amenity\", \"landuse\", \"cuisine\", \"tourism\", \"leisure\")\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflator.Conflator.getDistance","title":"getDistance","text":"
    getDistance(newdata, olddata)\n

    Compute the distance between two features in meters

    Parameters:

    Name Type Description Default newdata Feature

    A feature from the external dataset

    required olddata Feature

    A feature from the existing OSM dataset

    required

    Returns:

    Type Description float

    The distance between the two features

    Source code in osm_merge/conflator.py
    def getDistance(self,\n        newdata: Feature,\n        olddata: Feature,\n        ) -> float:\n    \"\"\"\n    Compute the distance between two features in meters\n\n    Args:\n        newdata (Feature): A feature from the external dataset\n        olddata (Feature): A feature from the existing OSM dataset\n\n    Returns:\n        (float): The distance between the two features\n    \"\"\"\n    # timer = Timer(text=\"getDistance() took {seconds:.0f}s\")\n    # timer.start()\n    # dist = shapely.hausdorff_distance(center, wkt)\n    dist = float()\n\n    # Transform so the results are in meters instead of degress of the\n    # earth's radius.\n    project = pyproj.Transformer.from_proj(\n        pyproj.Proj(init='epsg:4326'),\n        pyproj.Proj(init='epsg:3857')\n        )\n    newobj = transform(project.transform, shape(newdata[\"geometry\"]))\n    oldobj = transform(project.transform, shape(olddata[\"geometry\"]))\n\n    # FIXME: we shouldn't ever get here...\n    if oldobj.type == \"MultiLineString\":\n        log.error(f\"MultiLineString unsupported!\")\n\n    if newobj.type == \"MultiLineString\":\n        lines = newobj.geoms\n    elif newobj.type == \"GeometryCollection\":\n        lines = newobj.geoms\n    else:\n        lines = MultiLineString([newobj]).geoms\n\n    # dists = list()\n    best = None\n    for segment in lines:\n        if oldobj.geom_type == \"LineString\" and segment.geom_type == \"LineString\":\n            # Compare two highways\n            if oldobj.within(segment):\n                log.debug(f\"CONTAINS\")\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"LineString\":\n            # We only want to compare LineStrings, so force the distance check\n            # to be False\n            dist = 12345678.9\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"Point\":\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"Polygon\" and segment.geom_type == \"Polygon\":\n            # compare two buildings\n            pass\n        elif oldobj.geom_type == \"Polygon\" and segment.geom_type == \"Point\":\n            # Compare a point with a building, used for ODK Collect data\n            center = shapely.centroid(oldobj)\n            dist = segment.distance(center)\n        elif oldobj.geom_type == \"Point\" and segment.geom_type == \"LineString\":\n            dist = segment.distance(oldobj)\n        elif oldobj.geom_type == \"LineString\" and segment.geom_type == \"Point\":\n            dist = segment.distance(oldobj)\n\n        # Find the closest segment\n        if best is None:\n            best = dist\n        elif dist < best:\n            # log.debug(f\"BEST: {best} < {dist}\")\n            best = dist\n\n    # timer.stop()\n    return best # dist # best\n
    "},{"location":"api/#osm_merge.conflator.Conflator.checkTags","title":"checkTags","text":"
    checkTags(extfeat, osm)\n

    Check tags between 2 features.

    Parameters:

    Name Type Description Default extfeat Feature

    The feature from the external dataset

    required osm Feature

    The result of the SQL query

    required

    Returns:

    Type Description int

    The number of tag matches

    dict

    The updated tags

    Source code in osm_merge/conflator.py
    def checkTags(self,\n              extfeat: Feature,\n              osm: Feature,\n               ):\n    \"\"\"\n    Check tags between 2 features.\n\n    Args:\n        extfeat (Feature): The feature from the external dataset\n        osm (Feature): The result of the SQL query\n\n    Returns:\n        (int): The number of tag matches\n        (dict): The updated tags\n    \"\"\"\n    match_threshold = 85\n    match = [\"name\", \"ref\", \"ref:usfs\"]\n    hits = 0\n    props = dict()\n    id = 0\n    version = 0\n    props = extfeat['properties'] | osm['properties']\n    # ODK Collect adds these two tags we don't need.\n    if \"title\" in props:\n        del props[\"title\"]\n    if \"label\" in props:\n        del props[\"label\"]\n\n    if \"id\" in props:\n        # External data not from an OSM source always has\n        # negative IDs to distinguish it from current OSM data.\n        id = int(props[\"id\"])\n    else:\n        id -= 1\n        props[\"id\"] = id\n\n    if \"version\" in props:\n        # Always use the OSM version if it exists, since it gets\n        # incremented so JOSM see it's been modified.\n        props[\"version\"] = int(version)\n        # Name may also be name:en, name:np, etc... There may also be\n        # multiple name:* values in the tags.\n    else:\n        props[\"version\"] = 1\n\n    for key in match:\n        if \"highway\" in osm[\"properties\"]:\n            # Always use the value in the secondary, which is\n            # likely OSM.\n            props[\"highway\"] = osm[\"properties\"][\"highway\"]\n        if key not in props:\n            continue\n\n        # Usually it's the name field that has the most variety in\n        # in trying to match strings. This often is differences in\n        # capitalization, singular vs plural, and typos from using\n        # your phone to enter the name. Course names also change\n        # too so if it isn't a match, use the new name from the\n        # external dataset.\n        if key in osm[\"properties\"] and key in extfeat[\"properties\"]:\n            # Sometimes there will be a word match, which returns a\n            # ratio in the low 80s. In that case they should be\n            # a similar length.\n            length = len(extfeat[\"properties\"][key]) - len(osm[\"properties\"][key])\n            ratio = fuzz.ratio(extfeat[\"properties\"][key].lower(), osm[\"properties\"][key].lower())\n            if ratio > match_threshold and length <= 3:\n                hits += 1\n                props[\"ratio\"] = ratio\n                props[key] = extfeat[\"properties\"][key]\n                if ratio != 100:\n                    # Often the only difference is using FR or FS as the\n                    # prefix. In that case, see if the ref matches.\n                    if key[:3] == \"ref\":\n                        # This assume all the data has been converted\n                        # by one of the utility programs, which enfore\n                        # using the ref:usfs tag.\n                        tmp = extfeat[\"properties\"][\"ref:usfs\"].split(' ')\n                        extref = tmp[1].upper()\n                        tmp = osm[\"properties\"][\"ref:usfs\"].split(' ')\n                        newref = tmp[1].upper()\n                        # log.debug(f\"REFS: {extref} vs {newref}: {extref == newref}\")\n                        if extref == newref:\n                            hits += 1\n                            # Many minor changes of FS to FR don't require\n                            # caching the exising value as it's only the\n                            # prefix that changed. It always stayes in this\n                            # range.\n                            if osm[\"properties\"][\"ref:usfs\"][:3] == \"FS \" and ratio > 80 and ratio < 90:\n                                # log.debug(f\"Ignoring old ref {osm[\"properties\"][\"ref:usfs\"]}\")\n                                continue\n                    # For a fuzzy match, cache the value from the\n                    # secondary dataset and use the value in the\n                    # primary dataset.\n                    props[f\"old_{key}\"] = osm[\"properties\"][key]\n\n    # print(props)\n    return hits, props\n
    "},{"location":"api/#osm_merge.conflator.Conflator.loadFile","title":"loadFile","text":"
    loadFile(osmfile)\n

    Read a OSM XML file and convert it to GeoJson for consistency.

    Parameters:

    Name Type Description Default osmfile str

    The OSM XML file to load

    required

    Returns:

    Type Description list

    The entries in the OSM XML file

    Source code in osm_merge/conflator.py
    def loadFile(\n    self,\n    osmfile: str,\n) -> list:\n    \"\"\"\n    Read a OSM XML file and convert it to GeoJson for consistency.\n\n    Args:\n        osmfile (str): The OSM XML file to load\n\n    Returns:\n        (list): The entries in the OSM XML file\n    \"\"\"\n    alldata = list()\n    size = os.path.getsize(osmfile)\n    with open(osmfile, \"r\") as file:\n        xml = file.read(size)\n        doc = xmltodict.parse(xml)\n        if \"osm\" not in doc:\n            logging.warning(\"No data in this instance\")\n            return False\n        data = doc[\"osm\"]\n        if \"node\" not in data:\n            logging.warning(\"No nodes in this instance\")\n            return False\n\n    nodes = dict()\n    for node in data[\"node\"]:\n        properties = {\n            \"id\": int(node[\"@id\"]),\n        }\n        if \"@version\" not in node:\n            properties[\"version\"] = 1\n        else:\n            properties[\"version\"] = node[\"@version\"]\n\n        if \"@timestamp\" in node:\n            properties[\"timestamp\"] = node[\"@timestamp\"]\n\n        if \"tag\" in node:\n            for tag in node[\"tag\"]:\n                if type(tag) == dict:\n                    # Drop all the TIGER tags based on\n                    # https://wiki.openstreetmap.org/wiki/TIGER_fixup\n                    if tag[\"@k\"] in properties:\n                        if properties[tag[\"@k\"]][:7] == \"tiger:\":\n                            continue\n                    properties[tag[\"@k\"]] = tag[\"@v\"].strip()\n                    # continue\n                else:\n                    properties[node[\"tag\"][\"@k\"]] = node[\"tag\"][\"@v\"].strip()\n                # continue\n        geom = Point((float(node[\"@lon\"]), float(node[\"@lat\"])))\n        # cache the nodes so we can dereference the refs into\n        # coordinates, but we don't need them in GeoJson format.\n        nodes[properties[\"id\"]] = geom\n        if len(properties) > 2:\n            alldata.append(Feature(geometry=geom, properties=properties))\n\n    for way in data[\"way\"]:\n        attrs = dict()\n        properties = {\n            \"id\": int(way[\"@id\"]),\n        }\n        refs = list()\n        if \"nd\" in way:\n            if len(way[\"nd\"]) > 0:\n                for ref in way[\"nd\"]:\n                    refs.append(int(ref[\"@ref\"]))\n            properties[\"refs\"] = refs\n\n        if \"@version\" not in node:\n            properties[\"version\"] = 1\n        else:\n            properties[\"version\"] = node[\"@version\"]\n\n        if \"@timestamp\" in node:\n            attrs[\"timestamp\"] = node[\"@timestamp\"]\n\n        if \"tag\" in way:\n            for tag in way[\"tag\"]:\n                if type(tag) == dict:\n                    properties[tag[\"@k\"]] = tag[\"@v\"].strip()\n                    # continue\n                else:\n                    properties[way[\"tag\"][\"@k\"]] = way[\"tag\"][\"@v\"].strip()\n                # continue\n        # geom =\n        tmp = list()\n        for ref in refs:\n            tmp.append(nodes[ref]['coordinates'])\n        geom = LineString(tmp)\n        if geom is None:\n            breakpoint()\n        log.debug(f\"WAY: {properties}\")\n        alldata.append(Feature(geometry=geom, properties=properties))\n\n    return alldata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.initInputDB","title":"initInputDB async","text":"
    initInputDB(config=None, dburi=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None

    Returns: (bool): Whether it initialiized

    Source code in osm_merge/conflator.py
    async def initInputDB(self,\n                    config: str = None,\n                    dburi: str = None,\n                    ) -> bool:\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    Returns:\n        (bool): Whether it initialiized\n    \"\"\"\n    db = GeoSupport(dburi, config)\n    await db.initialize()\n    self.postgres.append(db)\n\n    return True\n
    "},{"location":"api/#osm_merge.conflator.Conflator.initOutputDB","title":"initOutputDB async","text":"
    initOutputDB(dburi=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    required Source code in osm_merge/conflator.py
    async def initOutputDB(self,\n                    dburi: str = None,\n                    ):\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    \"\"\"\n    if dburi:\n        self.dburi = dburi\n        await self.createDBThreads(dburi, config)\n    elif self.dburi:\n        await self.createDBThreads(self.dburi, config)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.createDBThreads","title":"createDBThreads async","text":"
    createDBThreads(uri=None, config=None, execs=cores)\n

    Create threads for writting to the primary datatbase to avoid problems with corrupting data.

    Parameters:

    Name Type Description Default uri str

    URI for the primary database

    None config str

    The config file from the osm-rawdata project

    None threads int

    The number of threads to create

    required

    Returns:

    Type Description bool

    Whether the threads were created sucessfully

    Source code in osm_merge/conflator.py
    async def createDBThreads(self,\n                    uri: str = None,\n                    config: str = None,\n                    execs: int = cores,\n                    ) -> bool:\n    \"\"\"\n    Create threads for writting to the primary datatbase to avoid\n    problems with corrupting data.\n\n    Args:\n        uri (str): URI for the primary database\n        config (str, optional): The config file from the osm-rawdata project\n        threads (int, optional): The number of threads to create\n\n    Returns:\n        (bool): Whether the threads were created sucessfully\n    \"\"\"\n    # Each thread needs it's own connection to postgres to avoid problems\n    # when inserting or updating the primary database.\n    if uri:\n        for thread in range(0, execs + 1):\n            db = GeoSupport(uri)\n            await db.initialize(uri, config)\n            if not db:\n                return False\n            self.postgres.append(db)\n        if self.boundary:\n            if 'features' in self.boundary:\n                poly = self.boundary[\"features\"][0][\"geometry\"]\n            else:\n                poly = shape(self.boundary['geometry'])\n\n            # FIXME: we only need to clip once to create the view, this is not\n            # confirmed yet.\n            await db.clipDB(poly, self.postgres[0])\n\n        return True\n
    "},{"location":"api/#osm_merge.conflator.Conflator.conflateData","title":"conflateData async","text":"
    conflateData(odkspec, osmspec, threshold=3.0)\n

    Open the two source files and contlate them.

    Parameters:

    Name Type Description Default odkspec str

    The external data uri

    required osmspec str

    The existing OSM data uri

    required threshold float

    Threshold for distance calculations in meters

    3.0

    Returns:

    Type Description list

    The conflated output

    Source code in osm_merge/conflator.py
    async def conflateData(self,\n                odkspec: str,\n                osmspec: str,\n                threshold: float = 3.0,\n                ) -> list:\n    \"\"\"\n    Open the two source files and contlate them.\n\n    Args:\n        odkspec (str): The external data uri\n        osmspec (str): The existing OSM data uri\n        threshold (float): Threshold for distance calculations in meters\n\n    Returns:\n        (list):  The conflated output\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    odkdata = list()\n    osmdata = list()\n\n    result = list()\n    if odkspec[:3].lower() == \"pg:\":\n        db = GeoSupport(odkspec[3:])\n        result = await db.queryDB()\n    else:\n        odkdata = self.parseFile(odkspec)\n\n    if osmspec[:3].lower() == \"pg:\":\n        db = GeoSupport(osmspec[3:])\n        result = await db.queryDB()\n    else:\n        osmdata = self.parseFile(osmspec)\n\n    entries = len(odkdata)\n    chunk = round(entries / cores)\n\n    alldata = list()\n    tasks = list()\n\n    log.info(f\"The primary dataset has {len(odkdata)} entries\")\n    log.info(f\"The secondary dataset has {len(osmdata)} entries\")\n\n    # Make threading optional for easier debugging\n    single = False\n\n    if single:\n        alldata = conflateThread(odkdata, osmdata)\n    else:\n        futures = list()\n        with concurrent.futures.ProcessPoolExecutor(max_workers=cores) as executor:\n            for block in range(0, entries, chunk):\n                future = executor.submit(conflateThread,\n                        odkdata[block:block + chunk - 1],\n                        osmdata\n                        )\n                futures.append(future)\n            #for thread in concurrent.futures.wait(futures, return_when='ALL_COMPLETED'):\n            for future in concurrent.futures.as_completed(futures):\n                log.debug(f\"Waiting for thread to complete..\")\n                alldata += future.result()\n\n        executor.shutdown()\n\n    timer.stop()\n\n    return alldata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.dump","title":"dump","text":"
    dump()\n

    Dump internal data for debugging.

    Source code in osm_merge/conflator.py
    def dump(self):\n    \"\"\"\n    Dump internal data for debugging.\n    \"\"\"\n    print(f\"Data source is: {self.dburi}\")\n    print(f\"There are {len(self.data)} existing features\")\n
    "},{"location":"api/#osm_merge.conflator.Conflator.parseFile","title":"parseFile","text":"
    parseFile(filespec)\n

    Parse the input file based on it's format.

    Parameters:

    Name Type Description Default filespec str

    The file to parse

    required

    Returns:

    Type Description list

    The parsed data from the file

    Source code in osm_merge/conflator.py
    def parseFile(self,\n            filespec: str,\n            ) ->list:\n    \"\"\"\n    Parse the input file based on it's format.\n\n    Args:\n        filespec (str): The file to parse\n\n    Returns:\n        (list): The parsed data from the file\n    \"\"\"\n    odkpath = Path(filespec)\n    odkdata = list()\n    if odkpath.suffix == '.geojson':\n        # FIXME: This should also work for any GeoJson file, not\n        # only ODK ones, but this has yet to be tested.\n        log.debug(f\"Parsing GeoJson files {odkpath}\")\n        odkfile = open(odkpath, 'r')\n        features = geojson.load(odkfile)\n        odkdata = features['features']\n    elif odkpath.suffix == '.osm':\n        log.debug(f\"Parsing OSM XML files {odkpath}\")\n        osmfile = OsmFile()\n        odkdata = self.loadFile(odkpath)\n    elif odkpath.suffix == \".csv\":\n        log.debug(f\"Parsing csv files {odkpath}\")\n        odk = ODKParsers()\n        for entry in odk.CSVparser(odkpath):\n            odkdata.append(odk.createEntry(entry))\n    elif odkpath.suffix == \".json\":\n        log.debug(f\"Parsing json files {odkpath}\")\n        odk = ODKParsers()\n        for entry in odk.JSONparser(odkpath):\n            odkdata.append(odk.createEntry(entry))\n    return odkdata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.conflateDB","title":"conflateDB","text":"
    conflateDB(source)\n

    Conflate all the data. This the primary interfacte for conflation.

    Parameters:

    Name Type Description Default source str

    The source file to conflate

    required

    Returns:

    Type Description dict

    The conflated features

    Source code in osm_merge/conflator.py
    def conflateDB(self,\n                 source: str,\n                 ) -> dict:\n    \"\"\"\n    Conflate all the data. This the primary interfacte for conflation.\n\n    Args:\n        source (str): The source file to conflate\n\n    Returns:\n        (dict):  The conflated features\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n\n    log.info(\"Opening data file: %s\" % source)\n    toplevel = Path(source)\n    if toplevel.suffix == \".geosjon\":\n        src = open(source, \"r\")\n        self.data = geojson.load(src)\n    elif toplevel.suffix == \".osm\":\n        src = open(source, \"r\")\n        osmin = OsmFile()\n        self.data = osmin.loadFile(source) # input file\n        if self.boundary:\n            gs = GeoSupport(source)\n            # self.data = gs.clipFile(self.data)\n\n    # Use fuzzy string matching to handle minor issues in the name column,\n    # which is often used to match an amenity.\n    if len(self.data) == 0:\n        self.postgres[0].query(\"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch\")\n    # log.debug(f\"OdkMerge::conflateData() called! {len(odkdata)} features\")\n\n    # A chunk is a group of threads\n    chunk = round(len(self.data) / cores)\n\n    # cycle = range(0, len(odkdata), chunk)\n\n    # Chop the data into a subset for each thread\n    newdata = list()\n    future = None\n    result = None\n    index = 0\n    if True:                # DEBUGGING HACK ALERT!\n        result = conflateThread(self.data, self, index)\n        return dict()\n\n    with concurrent.futures.ThreadPoolExecutor(max_workers=cores) as executor:\n        i = 0\n        subset = dict()\n        futures = list()\n        for key, value in self.data.items():\n            subset[key] = value\n            if i == chunk:\n                i = 0\n                result = executor.submit(conflateThread, subset, self, index)\n                index += 1\n                # result.add_done_callback(callback)\n                futures.append(result)\n                subset = dict()\n            i += 1\n        for future in concurrent.futures.as_completed(futures):\n        # # for future in concurrent.futures.wait(futures, return_when='ALL_COMPLETED'):\n            log.debug(f\"Waiting for thread to complete..\")\n            # print(f\"YYEESS!! {future.result(timeout=10)}\")\n            newdata.append(future.result(timeout=5))\n    timer.stop()\n    return newdata\n
    "},{"location":"api/#osm_merge.conflator.Conflator.writeOSM","title":"writeOSM","text":"
    writeOSM(data, filespec)\n

    Write the data to an OSM XML file.

    Parameters:

    Name Type Description Default data list

    The list of GeoJson features

    required filespec str

    The output file name

    required Source code in osm_merge/conflator.py
    def writeOSM(self,\n             data: list,\n             filespec: str,\n             ):\n    \"\"\"\n    Write the data to an OSM XML file.\n\n    Args:\n        data (list): The list of GeoJson features\n        filespec (str): The output file name\n    \"\"\"\n    osm = OsmFile(filespec)\n    negid = -100\n    id = -1\n    out = str()\n    for entry in data:\n        version = 1\n        tags = entry[\"properties\"]\n        if \"osm_id\" in tags:\n            id = tags[\"osm_id\"]\n        elif \"id\" in tags:\n            id = tags[\"id\"]\n        elif \"id\" not in tags:\n            # There is no id or version for non OSM features\n            id -= 1\n        if \"version\" in entry[\"properties\"]:\n            version = int(entry[\"properties\"][\"version\"])\n            version += 1\n        if id == 814085818:\n            breakpoint()\n        attrs = {\"id\": id, \"version\": version}\n        # These are OSM attributes, not tags\n        if \"id\" in tags:\n            del tags[\"id\"]\n        if \"version\" in tags:\n            del tags[\"version\"]\n        item = {\"attrs\": attrs, \"tags\": tags}\n        # if entry[\"geometry\"][\"type\"] == \"LineString\" or entry[\"geometry\"][\"type\"] == \"Polygon\":\n        # print(entry)\n        out = str()\n        if entry[\"geometry\"] is not None and entry[\"geometry\"][\"type\"] == \"Point\":\n            # It's a node referenced by a way\n            item[\"attrs\"][\"lon\"] = entry[\"geometry\"][\"coordinates\"][0]\n            item[\"attrs\"][\"lat\"] = entry[\"geometry\"][\"coordinates\"][1]\n            if \"timestamp\" in item[\"tags\"]:\n                item[\"attrs\"][\"timestamp\"] = item[\"tags\"][\"timestamp\"]\n                del item[\"tags\"][\"timestamp\"]\n            # referenced nodes should have no tags\n            del item[\"tags\"]\n            # FIXME: determine if we need to write nodes\n            # out = osm.createNode(item, False)\n            continue\n        else:\n            # OSM ways don't have a geometry, just references to node IDs.\n            # The OSM XML file won't have any nodes, so at first won't\n            # display in JOSM until you do a File->\"Update modified\",\n            if \"refs\" not in tags:\n                log.error(f\"No Refs: {tags}\")\n                continue\n                # breakpoint()\n            if len(tags['refs']) > 0:\n                if type(tags[\"refs\"]) != list:\n                    item[\"refs\"] = eval(tags[\"refs\"])\n                else:\n                    item[\"refs\"] = tags[\"refs\"]\n                del tags[\"refs\"]\n                out = osm.createWay(item, True)\n        if len(out) > 0:\n            osm.write(out)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.writeGeoJson","title":"writeGeoJson","text":"
    writeGeoJson(data, filespec)\n

    Write the data to a GeoJson file.

    Parameters:

    Name Type Description Default data dict

    The list of GeoJson features

    required filespec str

    The output file name

    required Source code in osm_merge/conflator.py
    def writeGeoJson(self,\n             data: dict,\n             filespec: str,\n             ):\n    \"\"\"\n    Write the data to a GeoJson file.\n\n    Args:\n        data (dict): The list of GeoJson features\n        filespec (str): The output file name\n    \"\"\"\n    file = open(filespec, \"w\")\n    fc = FeatureCollection(data)\n    geojson.dump(fc, file, indent=4)\n
    "},{"location":"api/#osm_merge.conflator.Conflator.osmToFeature","title":"osmToFeature","text":"
    osmToFeature(osm)\n

    Convert an entry from an OSM XML file with attrs and tags into a GeoJson Feature.

    Parameters:

    Name Type Description Default osm dict

    The OSM entry

    required

    Returns:

    Type Description Feature

    A GeoJson feature

    Source code in osm_merge/conflator.py
    def osmToFeature(self,\n                 osm: dict(),\n                 ) -> Feature:\n    \"\"\"\n    Convert an entry from an OSM XML file with attrs and tags into\n    a GeoJson Feature.\n\n    Args:\n        osm (dict): The OSM entry\n\n    Returns:\n        (Feature): A GeoJson feature\n    \"\"\"\n    if \"attrs\" not in osm:\n        return Feature(geometry=shape(osm[\"geometry\"]), properties=osm[\"properties\"])\n\n    if \"osm_id\" in osm[\"attrs\"]:\n        id = osm[\"attrs\"][\"osm_id\"]\n    elif \"id\" in osm[\"attrs\"]:\n        id = osm[\"attrs\"][\"id\"]\n    props = {\"id\": id}\n    if \"version\" in osm[\"attrs\"]:\n        props[\"version\"] = osm[\"attrs\"][\"version\"]\n\n    props.update(osm[\"tags\"])\n    # It's a way, so no coordinate\n    if \"refs\" in osm:\n        return Feature(properties=props)\n    else:\n        geom = Point((float(osm[\"attrs\"][\"lon\"]), float(osm[\"attrs\"][\"lat\"])))\n\n        return Feature(geometry=geom, properties=props)\n
    "},{"location":"api/#conflatebuildingspy","title":"conflateBuildings.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The DB URI

    None boundary Polygon

    The AOI of the project

    None

    Returns:

    Type Description ConflateDB

    An instance of this object

    Source code in osm_merge/conflateBuildings.py
    def __init__(\n    self,\n    dburi: str = None,\n    boundary: Polygon = None,\n):\n    \"\"\"This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The DB URI\n        boundary (Polygon): The AOI of the project\n\n    Returns:\n        (ConflateDB): An instance of this object\n    \"\"\"\n    self.postgres = list()\n    self.uri = None\n    if dburi:\n        self.uri = uriParser(dburi)\n        self.db = GeoSupport(dburi)\n    self.boundary = boundary\n    self.view = \"ways_poly\"\n    self.filter = list()\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.addSourceFilter","title":"addSourceFilter","text":"
    addSourceFilter(source)\n

    Add to a list of suspect bad source datasets

    Source code in osm_merge/conflateBuildings.py
    def addSourceFilter(\n    self,\n    source: str,\n):\n    \"\"\"Add to a list of suspect bad source datasets\"\"\"\n    self.filter.append(source)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.overlapDB","title":"overlapDB","text":"
    overlapDB(dburi)\n

    Conflate buildings where all the data is in the same postgres database using the Underpass raw data schema.

    Parameters:

    Name Type Description Default dburi str

    The URI for the existing OSM data

    required

    This is not fast for large areas!

    Source code in osm_merge/conflateBuildings.py
    def overlapDB(\n    self,\n    dburi: str,\n):\n    \"\"\"Conflate buildings where all the data is in the same postgres database\n    using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The URI for the existing OSM data\n\n    This is not fast for large areas!\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    # Find duplicate buildings in the same database\n    # sql = f\"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)\"\n    # sql = \"SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1\"\n    # Make a new postgres VIEW of all overlapping or touching buildings\n    # log.info(f\"Looking for overlapping buildings in \\\"{self.uri['dbname']}\\\", this make take awhile...\")\n    # print(sql)\n    # Views must be dropped in the right order\n    sql = (\n        \"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;\"\n    )\n    result = self.db.queryDB(sql)\n\n    if self.boundary:\n        self.db.clipDB(self.boundary)\n\n    log.debug(\"Clipping OSM database\")\n    ewkt = shape(self.boundary)\n    uri = uriParser(dburi)\n    log.debug(f\"Extracting OSM subset from \\\"{uri['dbname']}\\\"\")\n    sql = f\"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL\"\n    # print(sql)\n    result = self.db.queryDB(sql)\n\n    sql = \"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL\"\n    print(sql)\n    result = self.db.queryDB(sql)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.cleanDuplicates","title":"cleanDuplicates","text":"
    cleanDuplicates()\n

    Delete the entries from the duplicate building view.

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def cleanDuplicates(self):\n    \"\"\"Delete the entries from the duplicate building view.\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    log.debug(\"Removing duplicate buildings from ways_view\")\n    sql = \"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)\"\n\n    result = self.db.queryDB(sql)\n    return True\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.getNew","title":"getNew","text":"
    getNew()\n

    Get only the new buildings

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def getNew(self):\n    \"\"\"Get only the new buildings\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    sql = \"SELECT osm_id,geom,tags FROM ways_view\"\n    result = self.db.queryDB(sql)\n    features = list()\n    for item in result:\n        # log.debug(item)\n        entry = {\"osm_id\": item[0]}\n        entry.update(item[2])\n        geom = wkb.loads(item[1])\n        features.append(Feature(geometry=geom, properties=entry))\n\n    log.debug(f\"{len(features)} new features found\")\n    return FeatureCollection(features)\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.findHighway","title":"findHighway","text":"
    findHighway(feature)\n

    Find the nearest highway to a feature

    Parameters:

    Name Type Description Default feature Feature

    The feature to check against

    required Source code in osm_merge/conflateBuildings.py
    def findHighway(\n    self,\n    feature: Feature,\n):\n    \"\"\"Find the nearest highway to a feature\n\n    Args:\n        feature (Feature): The feature to check against\n    \"\"\"\n    pass\n
    "},{"location":"api/#osm_merge.conflateBuildings.ConflateBuildings.getDuplicates","title":"getDuplicates","text":"
    getDuplicates()\n

    Get the entries from the duplicate building view.

    Returns:

    Type Description FeatureCollection

    The entries from the datbase table

    Source code in osm_merge/conflateBuildings.py
    def getDuplicates(self):\n    \"\"\"Get the entries from the duplicate building view.\n\n    Returns:\n        (FeatureCollection): The entries from the datbase table\n    \"\"\"\n    sql = \"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view\"\n    result = self.db.queryDB(sql)\n    features = list()\n    for item in result:\n        # log.debug(item)\n        # First building identified\n        entry = {\"area\": float(item[0]), \"id\": int(item[1])}\n        geom = wkb.loads(item[2])\n        entry.update(item[3])\n        features.append(Feature(geometry=geom, properties=entry))\n\n        # Second building identified\n        entry = {\"area\": float(item[0]), \"id\": int(item[4])}\n        entry[\"id\"] = int(item[4])\n        geom = wkb.loads(item[5])\n        entry.update(item[6])\n        # FIXME: Merge the tags from the buildings into the OSM feature\n        # entry.update(item[3])\n        features.append(Feature(geometry=geom, properties=entry))\n\n    log.debug(f\"{len(features)} duplicate features found\")\n    return FeatureCollection(features)\n
    "},{"location":"api/#conflatepoipy","title":"conflatePOI.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The DB URI

    None boundary Polygon

    The AOI of the project

    None threshold int

    The distance in meters for distance calculations

    7

    Returns:

    Type Description ConflatePOI

    An instance of this object

    Source code in osm_merge/conflatePOI.py
    def __init__(self,\n             dburi: str = None,\n             boundary: Polygon = None,\n             threshold: int = 7,\n             ):\n    \"\"\"\n    This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str): The DB URI\n        boundary (Polygon): The AOI of the project\n        threshold (int): The distance in meters for distance calculations\n\n    Returns:\n        (ConflatePOI): An instance of this object\n    \"\"\"\n    self.data = dict()\n    self.db = None\n    self.tolerance = threshold # Distance in meters for conflating with postgis\n    self.boundary = boundary\n    # Use a common select so it's consistent when parsing results\n    self.select = \"SELECT osm_id,tags,version,ST_AsText(geom),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;%s\\'))\"\n    if dburi:\n        # for thread in range(0, cores + 1):\n        self.db = GeoSupport(dburi)\n        # self.db.append(db)\n        # We only need to clip the database into a new table once\n        if boundary:\n            self.db.clipDB(boundary, self.db.db)\n            self.db.clipDB(boundary, self.db.db, \"nodes_view\", \"nodes\")\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.overlaps","title":"overlaps","text":"
    overlaps(feature)\n

    Conflate a POI against all the features in a GeoJson file

    Parameters:

    Name Type Description Default feature dict

    The feature to conflate

    required

    Returns:

    Type Description dict

    The modified feature

    Source code in osm_merge/conflatePOI.py
    def overlaps(self,\n            feature: dict,\n            ):\n    \"\"\"\n    Conflate a POI against all the features in a GeoJson file\n\n    Args:\n        feature (dict): The feature to conflate\n\n    Returns:\n        (dict):  The modified feature\n    \"\"\"\n    # Most smartphone GPS are 5-10m off most of the time, plus sometimes\n    # we're standing in front of an amenity and recording that location\n    # instead of in the building.\n    gps_accuracy = 10\n    # this is the treshold for fuzzy string matching\n    match_threshold = 80\n    # log.debug(f\"conflateFile({feature})\")\n    hits = False\n    data = dict()\n    geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n    wkt = shape(geom)\n    for existing in self.data['features']:\n        id = int(existing['properties']['id'])\n        entry = shapely.from_geojson(str(existing))\n        if entry.geom_type != 'Point':\n            center = shapely.centroid(entry)\n        else:\n            center = entry\n            # dist = shapely.hausdorff_distance(center, wkt)\n            # if 'name' in existing['properties']:\n            #     print(f\"DIST1: {dist}, {existing['properties']['name']}\")\n        # x = shapely.distance(wkt, entry)\n        # haversine reverses the order of lat & lon from what shapely uses. We\n        # use this as meters is easier to deal with than cartesian coordinates.\n        x1 = (center.coords[0][1], center.coords[0][0])\n        x2 = (wkt.coords[0][1], wkt.coords[0][0])\n        dist = haversine(x1, x2, unit=Unit.METERS)\n        if dist < gps_accuracy:\n            # if 'name' in existing['properties']:\n            # log.debug(f\"DIST2: {dist}\")\n            # log.debug(f\"Got a Hit! {feature['tags']['name']}\")\n            for key,value in feature['tags'].items():\n                if key in self.analyze:\n                    if key in existing['properties']:\n                        result = fuzz.ratio(value, existing['properties'][key])\n                        if result > match_threshold:\n                            # log.debug(f\"Matched: {result}: {feature['tags']['name']}\")\n                            existing['properties']['fixme'] = \"Probably a duplicate!\"\n                            log.debug(f\"Got a dup in file!!! {existing['properties']['name'] }\")\n                            hits = True\n                            break\n        if hits:\n            version = int(existing['properties']['version'])\n            # coords = feature['geometry']['coordinates']\n            # lat = coords[1]\n            # lon = coords[0]\n            attrs = {'id': id, 'version': version, 'lat': feature['attrs']['lat'], 'lon': feature['attrs']['lon']}\n            tags = existing['properties']\n            tags['fixme'] = \"Probably a duplicate!\"\n            # Data extracts for ODK Collect\n            del tags['title']\n            del tags['label']\n            if 'building' in tags:\n                return {'attrs': attrs, 'tags': tags, 'refs': list()}\n            return {'attrs': attrs, 'tags': tags}\n    return dict()\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryToFeature","title":"queryToFeature","text":"
    queryToFeature(results)\n

    Convert the results of an SQL to a GeoJson Feature

    Parameters:

    Name Type Description Default results list

    The results of the query

    required

    Returns:

    Type Description list

    a list of the features fromn the results

    Source code in osm_merge/conflatePOI.py
    def queryToFeature(self,\n                   results: list,\n                   ):\n    \"\"\"\n    Convert the results of an SQL to a GeoJson Feature\n\n    Args:\n        results (list): The results of the query\n\n    Returns:\n        (list): a list of the features fromn the results\n    \"\"\"\n\n    features = list()\n    for entry in results:\n        osm_id = int(entry[0])\n        tags = entry[1]\n        version = int(entry[2])\n        coords = shapely.from_wkt(entry[3])\n        dist = entry[4]\n        # ways have an additional column\n        if len(entry) == 6:\n            refs = entry[5]\n        else:\n            refs = list()\n        if coords.geom_type == 'Polygon':\n            center = shapely.centroid(coords)\n            lat = center.y\n            lon = center.x\n            tags['geom_type'] = 'way'\n        elif coords.geom_type == \"Point\":\n            lat = coords.y\n            lon = coords.x\n            tags['geom_type'] = 'node'\n        else:\n            log.error(f\"Unsupported geometry type: {coords.geom_type}\")\n        # match = entry[5] # FIXME: for debugging\n        # the timestamp attribute gets added when it's uploaded to OSM.\n        attrs = {'id': osm_id,\n                'version': version,\n                'lat': lat,\n                'lon': lon,\n                }\n        tags['dist'] = dist\n        # tags['match'] = match # FIXME: for debugging\n        # tags['fixme'] = \"Probably a duplicate node!\"\n        features.append({'attrs': attrs, 'tags': tags, 'refs': refs})\n\n    return features\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.checkTags","title":"checkTags","text":"
    checkTags(feature, osm)\n

    Check tags between 2 features.

    Parameters:

    Name Type Description Default feature Feature

    The feature from the external dataset

    required osm dict

    The result of the SQL query

    required

    Returns:

    Type Description int

    The nunber of tag matches

    dict

    The updated tags

    Source code in osm_merge/conflatePOI.py
    def checkTags(self,\n              feature: Feature,\n              osm: dict,\n              ):\n    \"\"\"\n    Check tags between 2 features.\n\n    Args:\n        feature (Feature): The feature from the external dataset\n        osm (dict): The result of the SQL query\n\n    Returns:\n        (int): The nunber of tag matches\n        (dict): The updated tags\n    \"\"\"\n    tags = osm['tags']\n    hits = 0\n    match_threshold = 80\n    if osm['tags']['dist'] > float(self.tolerance):\n        return 0, osm['tags']\n    for key, value in feature['tags'].items():\n        if key in tags:\n            ratio = fuzz.ratio(value, tags[key])\n            if ratio > match_threshold:\n                hits += 1\n            else:\n                if key != 'note':\n                    tags[f'old_{key}'] = value\n        tags[key] = value\n\n    return hits, tags\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.conflateData","title":"conflateData","text":"
    conflateData(data, threshold=7)\n

    Conflate all the data. This the primary interfacte for conflation.

    Parameters:

    Name Type Description Default data list

    A list of all the entries in the OSM XML input file

    required threshold int

    The threshold for distance calculations

    7

    Returns:

    Type Description dict

    The modified features

    Source code in osm_merge/conflatePOI.py
    def conflateData(self,\n                 data: list,\n                 threshold: int = 7,\n                 ):\n    \"\"\"\n    Conflate all the data. This the primary interfacte for conflation.\n\n    Args:\n        data (list): A list of all the entries in the OSM XML input file\n        threshold (int): The threshold for distance calculations\n\n    Returns:\n        (dict):  The modified features\n    \"\"\"\n    timer = Timer(text=\"conflateData() took {seconds:.0f}s\")\n    timer.start()\n    # Use fuzzy string matching to handle minor issues in the name column,\n    # which is often used to match an amenity.\n    if len(self.data) == 0:\n        self.db.queryDB(\"CREATE EXTENSION IF NOT EXISTS fuzzystrmatch\")\n    log.debug(f\"conflateData() called! {len(data)} features\")\n\n    # A chunk is a group of threads\n    entries = len(data)\n    chunk = round(len(data) / cores)\n\n    if True: # FIXME: entries <= chunk:\n        result = conflateThread(data, self)\n        timer.stop()\n        return result\n\n    # Chop the data into a subset for each thread\n    newdata = list()\n    future = None\n    result = None\n    index = 0\n    with concurrent.futures.ThreadPoolExecutor(max_workers=cores) as executor:\n        i = 0\n        subset = dict()\n        futures = list()\n        for key, value in data.items():\n            subset[key] = value\n            if i == chunk:\n                i = 0\n                result = executor.submit(conflateThread, subset, self)\n                index += 1\n                # result.add_done_callback(callback)\n                futures.append(result)\n                subset = dict()\n            i += 1\n        for future in concurrent.futures.as_completed(futures):\n            log.debug(f\"Waiting for thread to complete..\")\n            # print(f\"YYEESS!! {future.result(timeout=10)}\")\n            newdata.append(future.result(timeout=5))\n    timer.stop()\n    return newdata\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryWays","title":"queryWays","text":"
    queryWays(feature, db=None)\n

    Conflate a POI against all the ways in a postgres view

    Parameters:

    Name Type Description Default feature Feature

    The feature to conflate

    required db GeoSupport

    The datbase connection to use

    None

    Returns:

    Type Description list

    The data with tags added from the conflation

    Source code in osm_merge/conflatePOI.py
        def queryWays(self,\n                    feature: Feature,\n                    db: GeoSupport = None,\n                    ):\n        \"\"\"\n        Conflate a POI against all the ways in a postgres view\n\n        Args:\n            feature (Feature): The feature to conflate\n            db (GeoSupport): The datbase connection to use\n\n        Returns:\n            (list): The data with tags added from the conflation\n        \"\"\"\n        # log.debug(f\"conflateWay({feature})\")\n        hits = 0\n        result = list()\n        geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n        wkt = shape(geom)\n\n        # cleanval = escape(value)\n        # Get all ways close to this feature.\n#        query = f\"SELECT osm_id,tags,version,ST_AsText(ST_Centroid(geom)),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) FROM ways_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} ORDER BY ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\'))\"\n        query = f\"{self.select}\" % wkt.wkt\n        query += f\", refs FROM ways_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} ORDER BY ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\'))\"\n        #log.debug(query)\n        result = list()\n        if db:\n            result = db.queryDB(query)\n        else:\n            result = self.db.queryDB(query)\n        if len(result) > 0:\n            hits += 1\n        else:\n            log.warning(f\"No results at all for {query}\")\n\n        return result\n
    "},{"location":"api/#osm_merge.conflatePOI.ConflatePOI.queryNodes","title":"queryNodes","text":"
    queryNodes(feature, db=None)\n

    Find all the nodes in the view within a certain distance that are buildings or amenities.

    Parameters:

    Name Type Description Default feature Feature

    The feature to use as the location

    required db GeoSupport

    The database connection to use

    None

    Returns:

    Type Description list

    The results of the conflation

    Source code in osm_merge/conflatePOI.py
    def queryNodes(self,\n                 feature: Feature,\n                 db: GeoSupport = None,\n                 ):\n    \"\"\"\n    Find all the nodes in the view within a certain distance that\n    are buildings or amenities.\n\n    Args:\n        feature (Feature): The feature to use as the location\n        db (GeoSupport): The database connection to use\n\n    Returns:\n        (list): The results of the conflation\n    \"\"\"\n    # log.debug(f\"queryNodes({feature})\")\n    hits = 0\n    geom = Point((float(feature[\"attrs\"][\"lon\"]), float(feature[\"attrs\"][\"lat\"])))\n    wkt = shape(geom)\n    result = list()\n    ratio = 1\n\n    # for key,value in feature['tags'].items():\n    # print(f\"NODE: {key} = {value}\")\n    # if key not in self.analyze:\n    #     continue\n\n    # Use a Geography data type to get the answer in meters, which\n    # is easier to deal with than degress of the earth.\n    # cleanval = escape(value)\n    # query = f\"SELECT osm_id,tags,version,ST_AsEWKT(geom),ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')),levenshtein(tags->>'{key}', '{cleanval}') FROM nodes_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} AND levenshtein(tags->>'{key}', '{cleanval}') <= {ratio}\"\n    # AND (tags->>'amenity' IS NOT NULL OR tags->>'shop' IS NOT NULL)\"\n    query = f\"{self.select}\" % wkt.wkt\n    query += f\" FROM nodes_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\\'SRID=4326;{wkt.wkt}\\')) < {self.tolerance} AND (tags->>'amenity' IS NOT NULL OR tags->>'building' IS NOT NULL)\"\n    #log.debug(query)\n    # FIXME: this currently only works with a local database,\n    # not underpass yet\n    if db:\n        result = db.queryDB(query)\n    else:\n        result = self.db.queryDB(query)\n    # log.debug(f\"Got {len(result)} results\")\n    if len(result) > 0:\n        hits += 1\n        # break\n    # else:\n    #     log.warning(f\"No results at all for {query}\")\n\n    return result\n
    "},{"location":"api/#geosupportpy","title":"geosupport.py","text":"

    Bases: object

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None

    Returns:

    Type Description GeoSupport

    An instance of this object

    Source code in osm_merge/geosupport.py
    def __init__(self,\n             dburi: str = None,\n             config: str = None,\n             ):\n    \"\"\"\n    This class conflates data that has been imported into a postgres\n    database using the Underpass raw data schema.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n\n    Returns:\n        (GeoSupport): An instance of this object\n    \"\"\"\n    self.db = None\n    self.dburi = dburi\n    self.config = config\n

    options: show_source: false heading_level: 3

    "},{"location":"api/#osm_merge.geosupport.GeoSupport.importDataset","title":"importDataset async","text":"
    importDataset(filespec)\n

    Import a GeoJson file into a postgres database for conflation.

    Parameters:

    Name Type Description Default filespec str

    The GeoJson file to import

    required

    Returns:

    Type Description bool

    If the import was successful

    Source code in osm_merge/geosupport.py
    async def importDataset(self,\n                 filespec: str,\n                 ) -> bool:\n    \"\"\"\n    Import a GeoJson file into a postgres database for conflation.\n\n    Args:\n        filespec (str): The GeoJson file to import\n\n    Returns:\n        (bool): If the import was successful\n    \"\"\"\n    file = open(filespec, \"r\")\n    data = geojson.load(file)\n\n    # Create the tables\n    sql = \"CREATE EXTENSION postgis;\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.nodes CASCADE; CREATE TABLE public.nodes (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.ways_line CASCADE; CREATE TABLE public.ways_line (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS public.poly CASCADE; CREATE TABLE public.ways_poly (osm_id bigint, geom geometry, tags jsonb);\"\n    result = await self.db.execute(sql)\n\n    # if self.db.is_closed():\n    #     return False\n\n    table = self.dburi.split('/')[1]\n    for entry in data[\"features\"]:\n        keys = \"geom, \"\n        geometry = shape(entry[\"geometry\"])\n        ewkt = geometry.wkt\n        if geometry.geom_type == \"LineString\":\n            table = \"ways_line\"\n        if geometry.geom_type == \"Polygon\":\n            table = \"ways_poly\"\n        if geometry.geom_type == \"Point\":\n            table = \"nodes\"\n        tags = f\"\\'{{\"\n        for key, value in entry[\"properties\"].items():\n            tags += f\"\\\"{key}\\\": \\\"{value}\\\", \"\n        tags = tags[:-2]\n        tags += \"}\\'::jsonb)\"\n        sql = f\"INSERT INTO {table} (geom, tags) VALUES(ST_GeomFromEWKT(\\'SRID=4326;{ewkt}\\'), {tags}\"\n        result = await self.db.pg.execute(sql)\n\n    return False\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.initialize","title":"initialize async","text":"
    initialize(dburi=None, config=None)\n

    When async, we can't initialize the async database connection, so it has to be done as an extrat step.

    Parameters:

    Name Type Description Default dburi str

    The database URI

    None config str

    The config file from the osm-rawdata project

    None Source code in osm_merge/geosupport.py
    async def initialize(self,\n                    dburi: str = None,\n                    config: str = None,\n                    ):\n    \"\"\"\n    When async, we can't initialize the async database connection,\n    so it has to be done as an extrat step.\n\n    Args:\n        dburi (str, optional): The database URI\n        config (str, optional): The config file from the osm-rawdata project\n    \"\"\"\n    if dburi:\n        self.db = PostgresClient()\n        await self.db.connect(dburi)\n    elif self.dburi:\n        self.db = PostgresClient()\n        await self.db.connect(self.dburi)\n\n    if config:\n        await self.db.loadConfig(config)\n    elif self.config:\n        await self.db.loadConfig(config)\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.clipDB","title":"clipDB async","text":"
    clipDB(boundary, db=None, view='ways_view')\n

    Clip a database table by a boundary

    Parameters:

    Name Type Description Default boundary Polygon

    The AOI of the project

    required db PostgresClient

    A reference to the existing database connection

    None view str

    The name of the new view

    'ways_view'

    Returns:

    Type Description bool

    If the region was clipped sucessfully

    Source code in osm_merge/geosupport.py
    async def clipDB(self,\n         boundary: Polygon,\n         db: PostgresClient = None,\n         view: str = \"ways_view\",\n         ):\n    \"\"\"\n    Clip a database table by a boundary\n\n    Args:\n        boundary (Polygon): The AOI of the project\n        db (PostgresClient): A reference to the existing database connection\n        view (str): The name of the new view\n\n    Returns:\n        (bool): If the region was clipped sucessfully\n    \"\"\"\n    remove = list()\n    if not boundary:\n        return False\n\n    ewkt = shape(boundary)\n\n    # Create a new postgres view\n    # FIXME: this should be a temp view in the future, this is to make\n    # debugging easier.\n    sql = f\"DROP VIEW IF EXISTS {view} CASCADE ;CREATE VIEW {view} AS SELECT * FROM ways_poly WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom)\"\n    # log.debug(sql)\n    if db:\n        result = await db.queryDB(sql)\n    elif self.db:\n        result = await self.db.queryDBl(sql)\n    else:\n        return False\n\n    return True\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.queryDB","title":"queryDB async","text":"
    queryDB(sql=None, db=None)\n

    Query a database table

    Parameters:

    Name Type Description Default db PostgreClient

    A reference to the existing database connection

    None sql str

    The SQL query to execute

    None

    Returns:

    Type Description list

    The results of the query

    Source code in osm_merge/geosupport.py
    async def queryDB(self,\n            sql: str = None,\n            db: PostgresClient = None,\n            ) -> list:\n    \"\"\"\n    Query a database table\n\n    Args:\n        db (PostgreClient, optional): A reference to the existing database connection\n        sql (str): The SQL query to execute\n\n    Returns:\n        (list): The results of the query\n    \"\"\"\n    result = list()\n    if not sql:\n        log.error(f\"You need to pass a valid SQL string!\")\n        return result\n\n    if db:\n        result = db.queryLocal(sql)\n    elif self.db:\n        result = self.db.queryLocal(sql)\n\n    return result\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.clipFile","title":"clipFile async","text":"
    clipFile(boundary, data)\n

    Clip a database table by a boundary

    Parameters:

    Name Type Description Default boundary Polygon

    The filespec of the project AOI

    required data FeatureCollection

    The data to clip

    required

    Returns:

    Type Description FeatureCollection

    The data within the boundary

    Source code in osm_merge/geosupport.py
    async def clipFile(self,\n            boundary: Polygon,\n            data: FeatureCollection,\n            ):\n    \"\"\"\n    Clip a database table by a boundary\n\n    Args:\n        boundary (Polygon): The filespec of the project AOI\n        data (FeatureCollection): The data to clip\n\n    Returns:\n        (FeatureCollection): The data within the boundary\n    \"\"\"\n    new = list()\n    if len(self.data) > 0:\n        for feature in self.data[\"features\"]:\n            shapely.from_geojson(feature)\n            if not shapely.contains(ewkt, entry):\n                log.debug(f\"CONTAINS {entry}\")\n                new.append(feature)\n                #  del self.data[self.data['features']]\n\n    return new\n
    "},{"location":"api/#osm_merge.geosupport.GeoSupport.copyTable","title":"copyTable async","text":"
    copyTable(table, remote)\n

    Use DBLINK to copy a table from the external database to a local table so conflating is much faster.

    Parameters:

    Name Type Description Default table str

    The table to copy

    required Source code in osm_merge/geosupport.py
    async def copyTable(self,\n                    table: str,\n                    remote: PostgresClient,\n                    ):\n    \"\"\"\n    Use DBLINK to copy a table from the external\n    database to a local table so conflating is much faster.\n\n    Args:\n        table (str): The table to copy\n    \"\"\"\n    timer = Timer(initial_text=f\"Copying {table}...\",\n                  text=\"copying {table} took {seconds:.0f}s\",\n                  logger=log.debug,\n                )\n    # Get the columns from the remote database table\n    self.columns = await remote.getColumns(table)\n\n    print(f\"SELF: {self.pg.dburi}\")\n    print(f\"REMOTE: {remote.dburi}\")\n\n    # Do we already have a local copy ?\n    sql = f\"SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename  = '{table}'\"\n    result = await self.pg.execute(sql)\n    print(result)\n\n    # cleanup old temporary tables in the current database\n    # drop = [\"DROP TABLE IF EXISTS users_bak\",\n    #         \"DROP TABLE IF EXISTS user_interests\",\n    #         \"DROP TABLE IF EXISTS foo\"]\n    # result = await pg.pg.executemany(drop)\n    sql = f\"DROP TABLE IF EXISTS new_{table} CASCADE\"\n    result = await self.pg.execute(sql)\n    sql = f\"DROP TABLE IF EXISTS {table}_bak CASCADE\"\n    result = await self.pg.execute(sql)\n    timer.start()\n    dbuser = self.pg.dburi[\"dbuser\"]\n    dbpass = self.pg.dburi[\"dbpass\"]\n    sql = f\"CREATE SERVER IF NOT EXISTS pg_rep_db FOREIGN DATA WRAPPER dblink_fdw  OPTIONS (dbname 'tm4');\"\n    data = await self.pg.execute(sql)\n\n    sql = f\"CREATE USER MAPPING IF NOT EXISTS FOR {dbuser} SERVER pg_rep_db OPTIONS ( user '{dbuser}', password '{dbpass}');\"\n    result = await self.pg.execute(sql)\n\n    # Copy table from remote database so JOIN is faster when it's in the\n    # same database\n    #columns = await sel.getColumns(table)\n    log.warning(f\"Copying a remote table is slow, but faster than remote access......\")\n    sql = f\"SELECT * INTO {table} FROM dblink('pg_rep_db','SELECT * FROM {table}') AS {table}({self.columns})\"\n    print(sql)\n    result = await self.pg.execute(sql)\n\n    return True\n
    "},{"location":"calculations/","title":"Conflation Calculations","text":"

    Part of the fun of external datasets, especially some that have been around long time like the MVUM data is the the variety of inconsistencies in the data. While OpenStreetMap itself is a bit overly flexible at time, so is external data. And some of the old data has been converted from other formats several times, with bugs getting introduced each time.

    "},{"location":"calculations/#geometries","title":"Geometries","text":"

    OpenStreetMap has relations, which are a collection of references to other features. External data may have LineStrings, MultiLineStrings or a GeometryCollection, all in the same file! For all calculations the MultiLineString and GeometryCollections are taken apart, so the calculations are between OSM data and that segment of the external data. Since this may product multiple values, those need to be evaluated and the most likely one returned.

    "},{"location":"calculations/#distance","title":"Distance","text":"

    A simple distance calculation is performed after transforming the coordinate system from global degrees to meters. The result is compared to a threshold distance, and any feature within that threshold is added to a list of possible matches. After a few features are found in the required distance, matching stops and then the next feature to be conflated is started on the same process.

    If the highway is a GeometryCollection or MultiLineString, then it's split into segments, and each one is checked for distance. The closest one is what is returned.

    "},{"location":"calculations/#slope-and-angle","title":"Slope and Angle","text":"

    Distance often will return features that are close to each other, but often they are spur roads off the more major one. So when two highway segments are found close to each other, the angle between them is calculated. This works well to differentiate between the more major highway, and the spur road that splits off from that.

    If the highway is a GeometryCollection or MultiLineString, then it's split into segments, and each one is checked for the angle. The closest one is what is returned.

    "},{"location":"calculations/#tag-checking","title":"Tag Checking","text":"

    Once there is at least one candidate within the parameters of distance and angle, then the tags are checked for matches. The tags we are primarily interested in are name(s) and reference number(s) of each MVUM road or trail. Some of the existing features in OpenStreetMap may be inaccurate as to the proper name and reference. And of course each feature may have an alt_name or both a ref and a ref:usfs. Due to the wonders of inconsistent data, a fuzzy string comparison is done. This handles most of the basic issues, like capitalization, one or 2 characters difference, etc... Anything above the threshold is considered a probably match, and increments a counter. This value is included in the conflated results, and is often between 1-3.

    The reference numbers between the two datasets is also compared. There is often a reference number in OSM already, but no name. The external dataset has the name, so we want to update OSM with that. In addition, the external datasets often have access information. Seasonal access, private land, or different types of vehicles which can be added to OSM.

    "},{"location":"calculations/#tag-merging","title":"Tag Merging","text":"

    The conflation process for merging tags uses the concept of primary and secondary datasets. The primary is considered to have the true value for a highway or trail. For example, if the name in the two datasets doesn't match, the secondary will then rename the current value to old_something. The primary's version becomes the same. Some with reference numbers.

    Other tags from the primary can also be merged, overriding what is currently in OSM. Once again, the old values are renamed, not deleted. When validating in JOSM, you can see both versions and make a final determination as to what is the correct value. Often it's just spelling differences.

    For all the features in OSM that only have a highway=something as a tag, all the desired tags from the primary dataset are added.

    For some tags like surface and smoothness, the value in OSM is potentially more recent, so those are not updated. For any highway feature lacking those tags, they get added.

    Optionally the various access tags for private, atv, horse, motorcycle, etc... are set in the post conflation dataset if they have a value in the external dataset.

    "},{"location":"calculations/#debug-tags","title":"Debug Tags","text":"

    Currently a few tags are added to each feature to aid in validating and debugging the conflation process. These should obviously be removed before uploading to OSM. They'll be removed at a future date after more validation. These are:

    • hits - The number of matching tags in a feature
    • ratio - The ratio for name matching if not 100%
    • dist - The distance between features
    • angle - The angle between two features
    • slope - The slope between two features
    "},{"location":"calculations/#issues","title":"Issues","text":"

    Conflation is never 100% accurate due to the wonderful um... \"flexibility\" of the datasets. Minor tweaks to the steering parameters for the distance, angle, and fuzzy string matching can produce slightly different results. I often run the same datasets with different parameters looking for the best results.

    "},{"location":"calculations/#clipping","title":"Clipping","text":"

    Where a feature crosses the task boundary, the calculations have to deal with incomplete features, which is messy. This is particularly a problem when conflating small datasets.

    "},{"location":"conflation/","title":"Conflating External Datasets","text":"

    This project is the merging of several programs for conflating external datasets with OpenStreetMap data developed at HOT. These were originally developed for large scale building imports using MS Footprints in East Africa, and to also work with conflating data collected with OpenDataKit for the Field Mapping Tasking Manager project.

    "},{"location":"conflation/#the-data-files","title":"The Data Files","text":"

    While any name can be used for the OSM database, I usually default to naming the OpenStreetMap database the country name as used in the data file. Other datasets have their own schema, and can be imported with ogr2ogr, or using python to write a custom importer. In that case I name the database after the dataset source. Past versions of this program could conflate between multiple datasets, so it's good to keep things clear.

    "},{"location":"conflation/#overture-data","title":"Overture Data","text":"

    The Overture Foundation (https://www.overturemaps.org) has been recently formed to build a competitor to Google Maps. The plan is to use OpenStreetMap (OSM) data as a baselayer, and layer other datasets on top. The currently available data (July 2023) has 13 different datasets in addition to the OSM data. It is available here. It also includes a snapshot of OSM data from the same time frame. Other than the OSM data and MS Footprints, all the current additional data is primarily US specific, and often contains multiple copies of the same dataset, but from different organization.

    The osm-rawdata python module has a utility that'll import the Parquet data files into the postgress database schema used by multiple projects at HOT. That schema is designed for data analysis, unlike the standard OSM database schema. There is more detail in these notes I've written about importing Overture Data into postgres.

    "},{"location":"conflation/#duplicate-buildings","title":"Duplicate Buildings","text":"

    This is the primary conflation task. Because of offsets in the satellite imagery used for the original buildings, there is rarely an exact duplicate, only similar. The only times when you see an exact duplicate, it's because the same source data is in multiple other datasets. The orientation may be different even if the same rough size, or it'll be roughly in the same position, but differing sizes. Several checks are made to determine duplicates. First is to check for any intersection of the two polygons. If the two polygons intersection it's an overlapping building or possibly duplicate. Any building in the footprint data that is found to be a duplicate is removed from the output data file.

    "},{"location":"conflation/#overlapping-buildings","title":"Overlapping Buildings","text":"

    It is entirely possible that a new building in the footprints data may overlap with an existing building in OSM. It wouldn't be overlapping in the footprints data. Since this requires human intervention to fix, these buildings are left in the output data, but flagged with a debugging tag of overlapping=yes. There is also many occurances where the building being imported has a better building geometry than OSM, so the best one should be selected.

    Using the HOT Underpass project, it is possible to scan the building geometries and either delete the bad geometry one, or flag it in the result data files for a human to validate the results.

    "},{"location":"conflation/#known-problems","title":"Known Problems","text":"

    There are two main issues with ML/AI derived building footprints, Buildings that are very close together, like the business section in many areas of the world, do not get marked as separate buildings. Instead the entire block of buildings is a single polygon. This will eventually get fixed by drone mapping, where there can be more of a street view of the buildings that you can't get using existing satellite imagery.

    The other problem is that as processing satellite imagery is that buildings are recognized by shading differences, so often features are flagged as buildings that don't actually exist. For example, big rocks in the desert, or haystacks in a field both get marked as a building. Any building in the footprints data that has no other buildings nearby, nor a highway or path of some kind, is flagged with a debugging tag of false=yes. Usually this is easy to determine looking at satellite imagery, since these are often remote buildings. The tags can be searched for when editing the data to visually determine whether it's a real building or not.

    "},{"location":"conflation/#conflating-other-than-buildings","title":"Conflating Other Than Buildings","text":""},{"location":"conflation/#opendatakit","title":"OpenDataKit","text":"

    Data collected in the field using ODK Collect is a specific case. If using using data extracts from OpenStreetMap, the data extract has the OSM ID, so it's much simpler to conflate the new tags with either the existing building polygon or POI. For this workflow, any tag in the feature from ODK will overwrite any existing values in the existing feature. This allows for updating the tags & values when ground-truthing. When the OSM XML file is loaded into JOSM, it has the modified attribute set, and the version has been incremented. In JOSM under the File menu, select the Update Modified menu item. This will sync the modified feature with current OSM. At that point all that needs to be done is validate the modified features, and upload to OSM.

    When ODK Collect is used but has no data extract, conflation is more complicated. For this use case, a more brute force algorythm is used. Initially any building polygon or POI within 7 meters is found by querying the database. Most smartphone GPS chipsets, even on high-end phones, are between 4-9m off from your actual location. That value was derived by looking at lots of data, and can be changed when invoking the conflation software in this project. Once nearby buildings are identified, then the tags are compared to see if there is a match.

    For example, if collecting data on a restaurant, it may have a new name, but if the nearby building is the only one with an amenity=restaurant** (or cafe, pub, etc...) it's considered a probable match. If there are multiple restaurants this doesn't work very well unless the name hasn't changed. If there are multiple possible features, a *fixme= tag is added to the POI, and it has to be later validated manually. Every tag in the ODK data has to be compares with the nearby buildings. Often it's the name tag that is used for many amenities.

    If a satellite imagery basemap is used in Collect, conflation is somewhat simpler. If the mapper has selected the center of the building using the basemap, conflation starts by checking for the building polygon in OSM that contains this location. If no building is found, the POI is added to the output file with a fixme=new building tag so the buildings can traced by the validator. Any tags from the POI are added to the new building polygon.

    "},{"location":"conflation/#points-of-interest-poi","title":"Points Of Interest (POI)","text":"

    It is common when collecting datasets from non-OSM sources each feature may only be single node. This may be a list of schools, businesses, etc... with additional information with each POI that can be added to the OSM building polygon (if it exists). Obviously any imported data must have a license acceptable for importing into OSM.

    Similar to how conflating ODK data when not using a data extract, the tags & values are compared with any nearby building. Since often these imports are features already in OSM with limited metadata, this adds more details.

    "},{"location":"conflation/#highways","title":"Highways","text":"

    Highways are more complex because it uses relations. A relation is a groups of highway segments into a single entity. Some times the tags are on the relation, other times each highway segment. The segments change when the highway condition changes, but the name and reference number doesn't change. External datasets don't use relations, they are OSM specific.

    "},{"location":"conflation/#mvum-highways","title":"MVUM Highways","text":"

    The USDA publishes a dataset of Motor Vehicle Use Maps (MVUM) highways in the National Forest. Some of this data has already been imported into OSM, although the metadata may be lacking, but the LineString is there. MVUM roads are primarily compacted dirt roads. While some can be driven in a passenger vehicle, most are varying degrees of bad to horrible to impassable. These highways are often used for recreational traffic by off-road vehicles, or for emergency access for a wildland fire or backcountry rescue.

    Another key detail of MVUM highways is each one may have 4 names! There is of course the primary name, for example \"Cedar Lake Road\". But it may also have a locals name, common in remote areas. And then there is the reference number. A MVUM highway may have two reference numbers, the country designated one, and the USDA one. Luckily OSM supports this. Many of these tags effect both how the highway is displayed, as well as routing for navigation.

    \"name\": \"Platte Lake Road\",\n\"alt_name\": \"Bar-K Ranch Road\",\n\"surface\": \"dirt\",\n\"smoothness\": \"bad\",\n\"highway\": \"track\",\n\"ref\": \"CO 112\",\n\"ref:usfs\": \"FR 521.1A\"\n\"tracktype\": \"grade3\"\n

    A bad highway is something I'd be comfortable driving in a 4x4 high-clearance vehicle. Smoothness values can be a bit misleading, as often what is in OSM may be years out of date. And most MVUM roads get zero maintainance, so get eroded, pot-holed, and or exposed rocks. And people's perception of road conditions is subjective based on one's experience driving these highways.

    All of this metadata makes conflation interesting. Since existing OSM features were added by more than one person, the tagging may not be consistent. For example, the existing data may have Forest Service Road 123, which should really be ref:usfs=FR 123. And the real highway name Piney Pass Road is in the MVUM dataset. The goal of highway conflation is to merge the new metadata into the existing OSM feature where possible. This then needs to be validated by a human being. There is still much tedious work to process post conflation data before it can be uploaded to OSM.

    But sometimes conflation works well, especially when the LineString in OSM was imported from older versions of the MVUM data. But often highways in OSM were traced off satellite imagery, and may have wildly different geometry.

    If you ignore conflating the tags other than name or ref, the process is somewhat less messy. And tags like surface and smoothness really should be ground-truthed anyway. So I do ignore those for now and stick to validating the name and the two reference numbers which are usually lacking in OSM. That and addding consistency to the data to make it easier to make data extracts.

    To conflate OSM highways with external data, initially each entry in the external dataset does a distance comparison with the existing OSM data. There is an optional threshold to set the distance limit. Since currently this is focused on conflating files without a database, this is computationally intensive, so slow. For data that was imported in the past from MVUM datasets, a distance of zero means it's probably the same segment. The external dataset needs to have the tagging converted to the syntax OSM uses. Tagging can be adjusted using a conversion program, but as conversion is usually a one-off task, it can also be done using JOSM or QGIS. Usually it's deleting most of the tags in the external dataset that aren't appropriate for OSM. Primarily the only tags that are needed are the name and any reference numbers. Since the MVUM data also classified the types of road surface, this can also be converted. Although as mentioned, may be drastically out of data, and OSM is more recent and ground-truthed.

    Then there is a comparison of the road names. It's assumed the one from the MVUM dataset is the correct one. And since typos and weird abbreviations may exist in the datasets, fuzzy string matching is performed. This way names like FS 123.1 can match FR 123.1A. In this case the current name value in OSM becomes alt_name, and the MVUM name becomes the official name. This way when validating you can make decisions where there is confusion on what is correct. For an exact name match no other tags are checked to save a little time.

    Any other processing is going to be MVUM highway specific, so there will be an additional step to work through the reference numbers not supported by this program.

    "},{"location":"conflation/#output-files","title":"Output Files","text":"

    If the data files are huge, it's necessary to conflate with a subset of all the data. For projects using the Tasking Manager or the Field Mapping Tasking Manager you can download the project boundary file and use that. For other projects you can extract administrative bondaries from OpenStreetMap, or use external sources. Usually county administrative boundaries are a good size. These can be extracted from OSM itself, or an external data file of boundaries.

    After conflation, an output file is created with the new buildings that are not duplicates of existing OSM data. This is much smaller than the original data, but still too large for anyone having bandwidth issues. This output file is in GeoJson format, so can be edited with JOSM or QGIS

    Since this software is under development, rather than automatically deleting features, it adds tags to the features. Then when editing the data, it's possible to see the flagged data and validate the conflation. It also makes it possible to delete manually the results of the conflation from the output file once satisfied about the validation of the results.

    "},{"location":"conflation/#validating-the-conflation","title":"Validating The Conflation","text":"

    The conflated data file can't be uploaded to OSM until it is validated. While QGIS can be used for this purpose, JOSM is preferred because it does validation checks, and uploads directly to OpenStreetMap. I start by loading the conflation data file, and then enabling the OpenStreetMap imagery for the basemap. Existing buildings in OSM are grey polygons, so it's possible to see existing buildings with the conflated new buildings as a layer on top.

    Once the buildings are loaded, you can then download the OSM data for that view. Then use the SelectDuplicateBuilding script to find any buildings that have been added since the initial data file for conflation was used. Once selected, those can be deleted in a single operation.

    The next step is validating what is left that is considered to be a new building. This is done using satellite imagery. Most commercial satellite imagery available for public use comes from Maxar. But the different providers (Bing, ESRI, Google, etc...) have different update cycles, so I often double check with ESRI imagery.

    If there is drone imagery available from Open Aerial Map, that's also a good surce of imagery, but often doesn't cover a large area.

    "},{"location":"formats/","title":"File Formats","text":"

    This project support two file formats, GeoJson and OSM XML.

    "},{"location":"formats/#geojson","title":"GeoJson","text":"

    GeoJson is widely supported by many tools, and this project uses it as the internal data structure for consistency. At the top level the file starts with a GeometryCollection, which is just a container for the list of features.

    "},{"location":"formats/#geometry","title":"Geometry","text":"

    Each GeoJson feature contains a geometry object that has two fields, the coordinates, and the type. Shapely or GDAL can be used to convert between string representations and geometry objects.

    "},{"location":"formats/#properties","title":"Properties","text":"

    The properties is the array of keyword=value pairs, similar to the tags in OSM. There is no definition of a schema, and pair works. For conflation though, standardizing on the OSM schema for tagging pairs is critical to keep things simple.

    \"properties\": {\n    \"ref:usfs\": \"FR 965.2\",\n    \"name\": \"  Road\",\n    \"4wd_only\": \"yes\",\n    \"seasonal\": \"yes\"\n},\n
    "},{"location":"formats/#osm-xml","title":"OSM XML","text":"

    An OSM XML file is read and converted to GeoJson, and then later it can get converted to OSM XML for the output file. In addition to the tags and geometry, each feature also has attributes.

    "},{"location":"formats/#attributes","title":"Attributes","text":"

    The OSM XML format has attributes, which are used to control editing a feature. Since this project wants to generate an OSM XML file for JOSM that allows for tag merging, these attributes are important. In the post conflation data file, the version of the existing OSM feature has been incremented, and the action is set to modify. This enable JOSM to see this as an edited feature so it can be uploaded.

    • id - the OSM ID of the feature
    • version - the current version of the feature
    • action - the action to apply when uploading to OSM
      • create
      • modify
      • delete
    • timestamp - the timestamp of the feature's last change

    With action=modify set, in JOSM you can update modified and sync with current OSM.

    "},{"location":"formats/#data-types","title":"Data Types","text":"

    There are two data types in the OSM XML files used for conflation. These are nodes and ways.

    "},{"location":"formats/#nodes","title":"Nodes","text":"

    A node is a single coordinate. This is often used as a POI, and will have tags. A node that is referenced in a way won't have any tags, just the coordinates. The version and timestamp get updated if there is a change to the node location.

    <node id=\"83276871\" version=\"3\"\n    timestamp=\"2021-06-12T16:25:43Z\" lat=\"37.6064731\" lon=\"-114.00674\"/>\n
    "},{"location":"formats/#ways","title":"Ways","text":"

    A way can be a linestring, polygon, any geometry that includes more than one node. This makes it difficult to do spatial comparisons, so when an OSM XML file is loaded, in addition to the refs, they are also converted to an actual geometry. All the calculations use the geometry, and the refs are used to construct the OSM XML output file for JOSM. OSM has no concept of a LineString or Polygon, the shape is determined by the tags, for example highway=track, or building=yes.

    <way id=\"10109556\" version=\"4\" timestamp=\"2021-06-12T15:42:25Z\">\n<nd ref=\"83305252\"/>\n<nd ref=\"8118009676\"/>\n<nd ref=\"8118009677\"/>\n<nd ref=\"83277113\"/>\n<nd ref=\"83277114\"/>\n<nd ref=\"83277116\"/>\n<nd ref=\"83277117\"/>\n<tag k=\"highway\" v=\"unclassified\"/>\n<tag k=\"surface\" v=\"dirt\"/>\n

    "},{"location":"formats/#converting-between-formats","title":"Converting Between Formats","text":"

    To support reading and writing OSM XML files, this project has it's own code that builds on top of the OsmFile() class in the OSM Fieldwork. This parses the OSM XML file into GeoJson format for internal use. All of the attributes in the OSM XML file being read are convert to tags in the GeoJson properties section, and then later converted from the properties back to OSM XML attributes when writing the output file.

    "},{"location":"highways/","title":"Conflating Highway and Trail Data","text":"

    This is focused only on highway and trail data in the US, but should be useful for other countries. In particular, this is focused on the primary goal of improving OpenStreetMap data in remote areas as these are used for emergency response. Most of these roads and trails are in OSM already, some from past imports, some traced off of satellite imagery.

    I did a talk at SOTM-US in Tucson about this project called OSM For Fire Fighting. This conflation software was developed to improve the quality of the remote highway data in OpenStreetMap. This is not an import of new data, only updating existing features with a focus on improved navigation. Importing new features from these datasets uses a different process, so it's better to not mix the two.

    While there are details in the the datasets that would be useful, the initial set is the name, the reference number, and the vehicle class appropriate for this highway. Not this can change over time, so if the smoothness tag is in the OSM feature, it's assumed that value is more accurate.

    The primary purpose is to clean up the TIGER import mess, which is often inaccurate. This leads to navigation problems as sometimes what is in OSM is not what the street sign says. Since there are multiple datasets supplied by government agencies with a good license for OSM, we data mine these through conflation to get the best name and reference number.

    Although most of the fields in these datasets aren't useful for OSM, some are like is it a seasonal road, various off road vehicle access permissions, etc... since this is also useful for navigation. Any tags added or edited will follow the OSM Tagging Guidelines for forest roads.

    "},{"location":"highways/#the-datasets","title":"The Datasets","text":"

    The primary source of these datasets is available from the FSGeodata Clearinghouse, which is maintained by the USDA.

    The Topographical map vector tiles are available from here., which is maintained by the National Forest Service.

    These have been partially imported in some areas in the past, complete with the bugs in the original datasets. One big advantage though is that the geometry in OSM was from the same USDA datasets at some point in the past, so it's relatively easy to match the geometries. Conflation then is mostly working through the name and reference fields between multiple files, which sometimes don't agree on the proper name.

    And OpenStreetMap of course.

    "},{"location":"highways/#processing-the-datasets","title":"Processing The Datasets","text":"

    Since the files are very large with different schema, a critical part of the conflation process is preparing the data. Some of these files are so large neither QGIS or JOSM can load them without crashing. I use two primary tools for splitting up the files. ogr2ogr for the GeoJson files, and osmium for the OSM XML files. The OSM XML format is required if you want the conflation process to merge the tags into an existing feature. If conflating with OSM data using the GeoJson format, you need to manually cut & paste the new tags onto the existing feature.

    As you further reduce large datasets to smaller more manageable pieces, this can generate many files. The top level choice is the largest category. I use National Forests boundaries as they can cross state lines.

    All of the datasets have issues with some features lacking a geometry. These appear to be duplicates of a Feature that does have a good geometry. They are also in \"NAD 83 - EPSG:4269\" for the CRS, so need to convert and fix the geometries. I use ogr2ogr to convert the GDB files to GeoJson like this:

    ogr2ogr Road_MVUM.geojson S_USA_Road_MVUM.gdb.zip -makevalid -s_srs EPSG:4269 -t_srs EPSG:4326 -sql 'SELECT * FROM Road_MVUM WHERE SHAPE IS NOT NULL'\n\nogr2ogr Trails_MVUM.geojson S_USA_Trail_MVUM.gdb.zip -makevalid -s_srs EPSG:4269 -t_srs EPSG:4326 -sql 'SELECT * FROM Trail_MVUM WHERE SHAPE IS NOT NULL'\n

    This generates a clean GeoJson file. It has many fields we don't want, so I run a simple conversion program that parses the fields are defined in the original file, and converts the few fields we want for conflation into the OSM equivalent tag/value. For conflation to work really well, all the datasets must use the same schema for the tags and values.

    Since the MVUM dataset covers the entire country, I build a directory tree in which the deeper you go, the smaller the datasets are. I have the National Forest Service Administrative boundaries unpacked into a top level directory. From there I chop the national dataset into just the data for a forest. This is still a large file, but manageable to edit. Sometimes with rural highway mapping, a large area works better. If there are plans to use the Tasking Manager, The files are still too large, as TM has a 5000sq km limit.

    Next is generating the task boundaries for each national forest that'll be under the 5000km limit. I used the tm-splitter.py program in this project to use the national forest boundary and break it into squares, and clipped properly at the boundary. These task boundary polygons can then be used to create the project in the Tasking Manager, which will further split that into the size you want for mapping.

    Something to be conscious of is these external datasets are also full of obscure bugs. Some of the data I think hasn't been updated since the government discovered digital mapping a few decades ago. The conversion utilities will handle all of these problems in these datasets.

    "},{"location":"highways/#the-openstreetmap-extract","title":"The OpenStreetMap Extract","text":"

    This step is unnecessary if you plan to manually conflate with a GeoJson file, so jump ahead to the next section.

    To conflate against OSM data with the goal of automatically merging the tags into the feature you have to prepare the dataset. Each feature needs to be validated anyway, merging tags is more efficient than cut & paste. Since this project is processing data from multiple US states, it exceeds the Overpass data size.

    I download the states I want to conflate from Geofabrik, and then use osmium merge to turn it into one big file. I have to do this because most of the national forest cross state lines. You'll get duplicate ID errors if you download these files on different days, so grab all the ones you plan to merge at the same time. Geofabrik updates every 24 hours.

    When dealing with files too large for JOSM or QGIS, osmium is the tool to use. There is also osmfilter and osmconvert which can be used as well. Ogr2ogr can't be used as it can't write the OSM XML format. To merge multiple files with osmium, do this:

    osmium merge --overwrite -o outdata.osm *.osm.pbf\n

    The next step is to delete everything but highways from the OSM XML file. When conflating highways, we don't care about amenities or waterways.

    The prefered data extraction program for conflation is the osmhighways.py program, which has much more fine-grained control, and also replaces the older fixname.py program and fixes the issues when the name field is actually a reference. It also deletes the extraneous tiger:* tags to reduce bloat.

    You can do something similar with osmium tool, but you wind up with extra features and tags which impacts conflation performance.

    osmium tags-filter --overwrite --remove-tags -o outdata.osm indata.osm w/highway=track,service,unclassified,primary,tertiary,secondary,path,residential,abandoned,footway,motorway,trunk\n

    Finally I clip this large file into separate datasets, one for each national forest.

    osmium extract --overwrite --polygon boundary.geojson -o outdata-roads.osm\n

    Then the real fun starts after the drudgery of getting ready to do conflation.

    "},{"location":"highways/#forest-road-names","title":"Forest Road Names","text":"

    The names and reference number in OSM now have a wide variety of incorrect tagging when it comes to names. \"Forest Service Road 123.4A\" is not a name, it is a reference number. Same for \"County Road 43\". The fixname.py utility scan the OSM extract and when it see incorrect tagging, correct it to the OSM standard. Since the external datasets already follow the same guidelines, this increases the chance of a good match when conflating, since comparing names is part of the process.

    "},{"location":"highways/#forest-road-reference-numbers","title":"Forest Road Reference Numbers","text":"

    I'm a huge believer that the name and reference number in OSM should match the street sign, since that's often what is used for navigation. Unfortunately the MVUM data has many highways with a .1 suffix, which some street signs don't display. Also, depending on the age of the paper maps or digital files, older maps lack the .1 suffix, but newer datasets so have the .1 suffix. Since a .1 suffix may be a spur road of questionable quality, it's an important detail, so included when updating the reference numbers.

    A minor note, the USGS Topographical basemap for JOSM also sometimes lacks the .1 suffix, so can't be used to validate it.

    "},{"location":"highways/#tiger-tag-deletion","title":"TIGER Tag Deletion","text":"

    Since there is community consensus that the tiger: tags added back in 2008 when the TIGER data was imported are meaningless, so should be deleted as bloat. The fixnames.py utility used for correct the name also deletes these from each feature so you don't have to manually do it.

    "},{"location":"highways/#mvum-roads","title":"MVUM Roads","text":"

    This is all the highways in National Forests. The data contains several fields that would be useful in OSM. This dataset has a grading of 1-5 for the type of vehicle that can drive the road, as well as a field for high clearance vehicles only. This is roughly equivalent to the smoothness tag in OSM. The surface type is also included, which is the same as the OSM surface tag. There are other fields for seasonal access, and seasonal road closures. Roads tagged as needing a high clearance vehicle generate a 4wd_only tag for OSM.

    The reference numbers often have a typo, an additional number (often 5 or 7) prefixed to the actual number in the original dataset, and were imported this way. Since the reference number needs to match what the map or street sign says, these all need to be fixed. And there are thousands of these...

    The type of vehicle that can be driven on a particular road is a bit subjective based on ones off-road driving experience. These are typically jeep trails of varying quality, but very useful for back-country rescues or wildland fires.

    "},{"location":"highways/#mvum-trails","title":"MVUM Trails","text":"

    These are Multi Vehicle Use Maps (MVUM), which define the class of vehicle appropriate to drive a road. The trails dataset contains additional highways, as some hiking trails are also forest service roads. These are primarily for hiking, but allow vehicle use, primarily specialized off-road vehicles like an ATV or UTV. They suffer from the same bad data as the MVUM roads.

    "},{"location":"highways/#national-forest-trails","title":"National Forest Trails","text":"

    This dataset is hiking trails that don't allow any vehicle usage at all. Many of these trails are in OSM, but lack the trail name and reference number. These also get used for emergency response as well. If there is a name and reference number for the trail, this makes it easier to refer a location to somebody over a radio instead of GPS coordinates.

    "},{"location":"highways/#usgs-topographical-maps","title":"USGS Topographical maps","text":"

    It's possible to download the vector datasets used to produce topographical maps. Each file covers a single 7.5 map quad, which is 49 miles or 78.85 km square. There are two variants for each quad, a GDB formatted file, and a Shapefile formatted file. The GDB file contains all the data as layers, whereas the Shapefiles have separate files for each feature type. I find the smaller feature based files easier to deal with. The two primary features we want to extract are Trans_RoadSegment and Trans_TrailSegment. Because of the volume of data, I only have a few states downloaded.

    I then used ogrmerge to produce a single file for each feature from all the smaller files. This file covers an entire state. This file has also has many fields we don't need, so only want the same set used for all the datasets. The usgs.py contained in this project is then run to filter the input data file into GeoJson with OSM tagging schema. The topographical data is especially useful for conflation, since the name and reference number match the paper or GeoPDF maps many people use.

    I found a few problems processing the ShapeFiles due to font encoding issues, and also with converting directly to GeoJson. I do this as a two step process, first make a unified ShapeFile from all the other ShapeFiles, and then convert it to GeoJson, which seems to work best.

    ogrmerge.py -nln highways -single -o highways.shp VECTOR_*/Shape/Trans_Road*.shp -lco ENCODING=\"\"\nogr2ogr highways.geojson highways.shp\n
    "},{"location":"highways/#conflation","title":"Conflation","text":"

    Once all the datasets are broken into manageable pieces, and everything is using the OSM tagging schema conflation can start. There are two datasets specified, one is the primary, and the other is the secondary. The tag values in the primary will override the values in the secondary file. To be paranoid about the details, when a tag value is overwritten by the primary data source, the current value becomes old_, ie... name becomes old_name, and then name is updated to the current value. Sometimes when editing the difference in the names is due to abbreviations being used, spelling mistakes, etc... so the old_name can be deleted.

    When conflating multiple datasets, those need to be conflated against each other before conflating with OSM. Since the topographical dataset is what matches a paper map, or GeoPDF, I consider that the primary dataset. The MVUM and trail data are particularly full of mistakes. Sometimes one dataset has a name, and the other doesn't, so conflation here produces that value.

    There are also many, many highways in these areas that in OSM only have highway=something. These are easy to conflate as you are only adding new tags. While in TIGER there are many highway=residential, that should really be highway=unclassified or highway=track, it is entirely possible it is a residential road. There's a lot of nice cabins way out in most national forests. But this is the type of thing you'd really need to ground-truth, and luckily doesn't effect navigation when you are out in a network of unmaintained dirt roads.

    The conflation algorithm is relatively simple at the high level, just find all other highways within a short distance, and then check the slope to eliminate a side road that may be touching. At the lower level, there is a lot of support for dealing with the bugs in the external datasets.

    The conflation algorithm is relatively simple at the high level, just find all other highways within a short distance, and then check the slope to eliminate a side road that may be touching. At the lower level, there is a lot of support for dealing with the bugs in the external datasets.

    "},{"location":"highways/#editing-in-josm","title":"Editing in JOSM","text":"

    Unfortunately manually validating the data is very time consuming, but it's important to get it right. I use the TODO plugin and also a data filter so I just select highways. With the TODO plugin, I add the selected features, ideally the entire task. Then I just go through all the features one at a time. When the OSM XML dataset is loaded, nothing will appear in JOSM. This is because the OSM XML file produced by conflation has the refs for the way, but lack the nodes. All it takes is selecting the update modified menu item under the File menu and all the nodes get downloaded, and the highways appear.

    I often have the original datasets loaded as layers, since sometimes it's useful to refer back to when you find issues with the conflation. Much of the existing data in OSM has many unused tags added during the TIGER import. These also get deleted as meaningless bloat. Some were imported with all the tags from the original dataset which also get deleted. This is life as a data janitor...

    Once you've validated all the features in the task, it can be run through the JOSM validator, and if all is good, uploaded to OSM. Often the JOSM validator finds many existing issues. I fix anything that is an error, and mostly ignore all the warning as that's a whole other project.

    If you are editing with the OSM XML file produced by conflation, when the file is opened, there will be some conflicts. This is usually due to things like the incorrect forest road name getting deleted, since now it's a proper ref:usfs reference number. And the tiger tags are gone as well if the fixnames.py utility is used.

    To fix the conflicts, I just select them all, and click on resolve to my version. Since all the new tags and old tags are preserved, you can edit them directly in the tags window in JOSM. Then I load all the ways into the TODO plugin. You can also use the conflict dialog box to edit the merged tags, but I find the other way more efficient.

    Using the plugin to validate a feature all I have to do is click on the entry. Sometimes there will be issues that need to be manually fixed. If conflation has changed the name, the old one is still in the

    feature so a manual comparison can be done. Often validating a feature is just deleting a few tags. But this is the important detail for machine editing. Somebody (not AI) must manually validate each changed feature. This is why the efficiency of mapping is important if you want to update a large area, like an entire national forest.

    Sometimes there are weird typos that have slipped through the process. This is where the time goes since you have to manually edit the values. But many times for these remote highways you can just mark it as done, and go on to the next one. Many of these highways in OSM have no tags beyond highway=track, so mo conflicts.This lets you validate a large number of features relatively quickly without sacrificing quality.

    "},{"location":"highways/#editing-osm-xml","title":"Editing OSM XML","text":"

    The conflation process produces an output file in OSM XML format. This file has incremented the version number and added action=modify to the attributes for the feature. When loaded into OSM, no data is initially visible. If you go to the File menu, go down and execute update modified. This will download all the nodes for the ways, and all the highways will become visible. Highways that have multiple tags already in OSM will become a conflict. These can be resolved easier in JOSM using the conflict dialog box. No geometries have changed, just tags, so you have to manually select the tags to be merged. Features without tags beyond highway=something merge automatically. which makes validating these features quick and easy. Note that every feature needs to be validated individually.

    "},{"location":"highways/#editing-geojson","title":"Editing GeoJson","text":"

    While JOSM can load and edit GeoJson data, not being in a native OSM format it can't be automatically merge. Instead load the GeoJson file and then create a new OSM layer. I select all the highways in the task, and load them into the TODO plugin. Sometimes there are so few highways, I don't use the TODO plugin. I then cut the tags and values for a feature from the GeoJson file, then switch to the OSM layer, and paste the tags into the feature.

    "},{"location":"highways/#validating","title":"Validating","text":"

    Here's an example of the results of a 3 way conflation. This was between the MVUM data, the topographical data, and OSM data.

    • highway=unclassified
    • lanes=2
    • name=Whisky Park Road
    • operator=US Forest Service
    • ref:usfs=FR 503
    • smoothness=good
    • surface=gravel

    Note that the name is spelled wrong.

    "},{"location":"highways/#splitting-highways","title":"Splitting Highways","text":"

    In national forest lands, the reference number changes at every major intersection. Side roads that branch off have an additional modifier added. or example, the main road may be called ref:usfs=\"FR 505\", with a change to ref:usfs=\"FR 505.1\" when it crosses a state line. Spur roads (often to campsites) get a letter attached, so the spur road is *ref:usfs=\"FR 505.1A\". Understanding how the reference numbers are assigned makes it easy to transmit your location over a radio or phone, and have somebody looking on a map find that location. Much easier than using GPS coordinates.

    For the highways that were traced off of satellite imagery, there is often a problem with forks in the road. Often tree cover or poor resolution imagery makes it hard to see the highway. And a lot of the highways go through an area with an entire network of other dirt roads, so the reference number may just group a bunch of highway segments. Often the most visible highway branch in the imagery at a fork is not the actual road. In this case the highway has to be split at the fork, and the new segment tagged for it's actual value, and the actual highway segment gets tagged correctly. This is critical if you want navigation to work.

    "},{"location":"highways/#ground-truthing","title":"Ground-truthing","text":"

    If you really want detailed and accurate maps, ground-truthing is an important part of the process. Road conditions change, especially the unmaintained dirt roads. Years of erosion, off-road vehicle abuse, etc... all change. For this reason the surface, smoothness and tracktype tags are not merged, as what is in the external datasets is likely out of date. Also sometimes parts of a dirt road get paved, or access is closed off completely.

    This is a good excuse to go there for some hiking and camping fun. You can load data into StreetComplete when online, and then use that in the field since will likely be no cell phone connection. Depending on the software used to collect the data, that may need conflation before uploading, for example OpenDataKit data. Some detail on that process is in this Highway Mapping blog post about a field mapping trip.

    "},{"location":"mvum/","title":"MVUM Conversion","text":"

    The MVUM dataset is all of the motor vehicle roads in a national forest. These are primarily remote dirt roads, often just a jeep track. These are heavily used for back country access for wildland fires and rescues. Currently much of this data has been imported in the past, complete with all the bugs in the dataset.

    This utility program normalizes the data, correcting or flagging bugs as an aid for better conflation.

    The original dataset can be found here on the USDA FSGeodata Clearinghouse website.

    "},{"location":"mvum/#dataset-bugs","title":"Dataset Bugs","text":""},{"location":"mvum/#bad-reference-numbers","title":"Bad Reference Numbers","text":"

    In some areas the MVUM data has had an 5 or a 7 prefixed to the actual reference number. These are all usually in the same area, so I assume whomever was doing data entry had a sticky keyboard, it got messed up when converting from paper maps to digital, who really knows. But it makes that tag worthless.

    Another common problem in the reference nummbers is in some areas the major maintained roads have a .1 appended. All minor part of the number should always have a letter appended. So FR 432.1\" is actually *FR 432\", whereas \"432.1A is correct. This was confirmed by reviewing multiple other map sources, as the paper and PDF version of the dataset has the correct version without the .1 appended. Obviously this dataset is not used to produce the maps you can get from the Forest Service.

    I do notice that in the original MVUM datasets, whomever emp=3.48 is, seems to be the main person with data entry issues. And this seems to apply across the entire western US. Not all highways mapped by 3.48 have this problem, but many do. Chances there other emps have similar issues. I'll keep track, and maybe add the employee ID as a temporary debugging tag in the conflation results. Cleaning up all the wrong reference numbers will make OSM the best map for road and trail navigation on public lands.

    "},{"location":"mvum/#dixie-national-forest","title":"Dixie National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 30 has been prefixed to all the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#manti-lasal-national-forest","title":"Manti-LaSal National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 5 or 7 has been prefixed to many of the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#fishlake-national-forest","title":"Fishlake National Forest","text":"

    In the current MVUM dataset for this national forest, for some reason a 4 or 40 has been prefixed to some of the IDs, making the reference numbers wrong.

    "},{"location":"mvum/#mount-hood-national-forest","title":"Mount Hood National Forest","text":"

    For some reason, some of the reference numbers have a 000 appended, making the reference numbers wrong. This applies to paved roads, not just remote jeep tracks.

    "},{"location":"mvum/#doesnt-match-the-sign","title":"Doesn't Match The Sign","text":"

    There is an issue with the MVUM reference numbers not matching the sign. This is luckily limited to whether there is a .1 appended to the reference number without an letter at the end. Usually a reference without a .1 is a primary road, and the .1 gets appended for a major branch off that road. While out ground-truthing MVUM roads recently I saw multiple examples where the reference numnber in the MVUM data (and often in OSM) has the .1, so I use that value regardless of what the sign says. It's still quite obviously what the reference number is since the only difference is the .1 suffix.

    This gets more interesting when you compare with other data sources, ie... paper and digital maps. Older data source seem to drop the .1, whereas the same road in a newer version of the dataset has the .1 suffix. So I figure anyone navigating remote roads that checks their other maps would figure out which way to go. So anyway, when way out on remote very_bad or horrible MVUM roads, you should have multiple maps if you don't want to get confused.

    "},{"location":"mvum/#missing-geometry","title":"Missing Geometry","text":"

    There are features with no geometry at all, but the tags all match an existing feature that does have a geometry. These appear to be accidental duplicates, so they get removed.

    "},{"location":"mvum/#dropped-fields","title":"Dropped Fields","text":"

    These fields are dropped as they aren't useful for OpenStreetMap.

    • TE_CN
    • BMP
    • EMP
    • SYMBOL_CODE
    • SEG_LENGTH
    • JURISDICTION
    • SYSTEM
    • ROUTE_STATUS
    • OBJECTIVE_MAINT_LEVEL
    • FUNCTIONAL_CLASS
    • LANES
    • COUNTY
    • CONGRESSIONAL_DISTRICT
    • ADMIN_ORG
    • SERVICE_LIFE
    • LEVEL_OF_SERVICE
    • PFSR_CLASSIFICATION
    • MANAGING_ORG
    • LOC_ERROR
    • GIS_MILES
    • SECURITY_ID
    • OPENFORUSETO
    • IVM_SYMBOL
    • GLOBALID
    • SHAPE_Length
    "},{"location":"mvum/#preserved-fields","title":"Preserved Fields","text":"

    The field names are a bit truncated in the dataset, but these are the

    • ID is id
    • NAME is name
    • OPER_MAINT_LEVEL is smoothness
    • SYMBOL_NAME smoothness
    • SURFACE_TYPE is surface
    • SEASONAL is seasonal
    • PRIMARY_MAINTAINER is operator
    "},{"location":"mvum/#abbreviations","title":"Abbreviations","text":"

    There are multiple and somewhat inconsistent abbreviations in the MVUM dataset highway names. OpenStreetMap should be using the full value. These were all found by the conflation software when trying to match names between two features. Since much of the MVUM data is of varying quality, there's probably a few not captured here that will have to be fixed when editing the data. This however improves the conflation results to limit manual editing.

    • \" Cr \" is \" Creek \"
    • \" Cr. \" is \" Creek \"
    • \" Crk \" is \" Creek \"
    • \" Cg \" is \" Campground \"
    • \" Rd. \" is \" Road\"
    • \" Mt \" is \" Mountain\"
    • \" Mtn \" is \" Mountain\"
    "},{"location":"mvum/#tag-values","title":"Tag values","text":""},{"location":"mvum/#oper_maint_level","title":"OPER_MAINT_LEVEL","text":"

    This field is used to determine the smoothness of the highway. Using the official forest service guidelines for this field, convienently they publish a Road Maintaince Guidelines, complete with muiltiple pictures and detaild technical information on each level. The coorelate these values, I did some ground-truthing on MVUM and I'd agree that level 2 is definetely high clearance vehicle only, and that it fits the definition here for very_bad, although some sections were more horrible, deeply rutted, big rocks, lots of erosion.

    • 5 -HIGH DEGREE OF USER COMFORT: Assigned to roads that provide a high degree of user comfort and convenience. This becomes smoothness=excellent.

    • 4 -MODERATE DEGREE OF USER COMFORT: Assigned to roads that provide a moderate degree of user comfort and convenience at moderate travel speeds. This becomes smoothness=bad.

    • 3 -SUITABLE FOR PASSENGER CARS: Assigned to roads open for and maintained for travel by a prudent driver in a standard passenger car. This becomes smnoothness=good.

    • 2 -HIGH CLEARANCE VEHICLES: Assigned to roads open for use by high clearance vehicles. This adds 4wd_only=yes and becomes smoothness=vary_bad.

    • 1 -BASIC CUSTODIAL CARE (CLOSED): Assigned to roads that have been placed in storage (> one year) between intermittent uses. Basic custodial maintenance is performed. Road is closed to vehicular traffic. This becomes access=no

    "},{"location":"mvum/#symbol_name","title":"SYMBOL_NAME","text":"

    Sometimes OPER_MAINT_LEVEL doesn't have a value, so this is used as a backup. These values are not used to update the existing values in OSM, they are only used for route planning ground-truthing trips.

    • Gravel Road, Suitable for Passenger Car becomes surface=gravel
    • Dirt Road, Suitable for Passenger Car becomes surface=dirt
    • Road, Not Maintained for Passenger Car becomes smoothness=very_bad
    • Paved Road becomes surface=paved
    "},{"location":"mvum/#surface_type","title":"SURFACE_TYPE","text":"

    This is another field that is converted, but not used when editing the existing OSM feature. This can only really be determined by ground-truthing, but it converted as another aid for route planning.

    • AGG -CRUSHED AGGREGATE OR GRAVEL becomes surface=gravel
    • AC -ASPHALT becomes surface=asphalt
    • IMP -IMPROVED NATIVE MATERIAL becomes surface=compacted
    • CSOIL -COMPACTED SOIL becomes surface=compacted
    • NAT -NATIVE MATERIAL becomes surface=dirt
    • P - PAVED becomes surface=paved
    "},{"location":"mvum/#name","title":"Name","text":"

    The name is always in all capitol letters, so this is converted to a standard first letter of every word is upper case, the rest is lower case.

    "},{"location":"mvum/#options","title":"Options","text":"
    -h, --help            show this help message and exit\n-v, --verbose         verbose output\n-i INFILE, --infile INFILE MVUM data file\n-c, --convert         Convert MVUM feature to OSM feature\n-o OUTFILE, --outfile OUTFILE Output GeoJson file\n
    "},{"location":"odkconflation/","title":"Conflating OpenDataKit with OpenStreetMap","text":"

    Typically conflation is done when doing data imports, but not always. Data collected in the field can be considered an import. Conflating buildings or POIs from external data is relatively easy as it's already been cleaned up and validated. When you are doing field mapping, then you have to cleanup and validate the data during conflation. This is a time consuming process even with good conflation software.

    I've worked with multiple conflation software over the years. Hootenanny, OpenJump (later forked into RoadMatcher), etc... which currently are now dead projects. Conflation is a hard technical challenge and often the results are poor and unstatisfing result. For smalller datasets often it's easier to do do manual conflation using JOSM or Qgis. This project tries to simply the problem by focusing on OpenStreetMap data.

    "},{"location":"odkconflation/#smartphone-data-collection","title":"Smartphone Data Collection","text":"

    While commercial organizations may use expensive GPS devices, most of us that do data collection as a volunteer or for an NGO use their smartphone. Their is a variety of smartphone apps for data collection that fall ihnto two categories. The first category are the apps like Vespucci, StreetComplete, and Organic Maps. These directly upload to OpenStreetMap. These are great for the casual mapper who only adds data occasionally and is limited to a POI. For example, a casual mapper may want to add the restaurant they are currrently eating in when they notices it's not in OpenStreetMap. In addition, they probably have a cell phone connection, so the data gets added right away.

    The other category are apps like ODK Collect, QField ArcGIS Field Maps which are oriented to larger scale mapping projects, often offline without any cellular connection. These collect a lot of data that then needs to get processed later. And conflation is part of this process.

    All of these smartphone based data collection apps suffer from poor GPS location accuracy. Modern smartphones (2024) are often 5-9 meters off the actual location, sometimes worse. In addition when field data collecting, you can't always record the actual location you want, you can only record where you are standing.

    You can improve the location data somewhat if you have a good quality basemap, for example you see a building within a courthouse wall when you are standing in the street. If you have a basemap, typically satellite imagery, you can touch the location on the basemap, and use that instead of where you are standing. Then later when conflating, you have a much higher chance the process will be less painful.

    "},{"location":"odkconflation/#opendatakit","title":"OpenDataKit","text":"

    OpenDataKit is a format for data import forms used to collect custom data. The source file is a spreadsheet, called an XLSForm. This gets used by the mobile app for the quesion and answer process defined by the XLSForm. There are multiple apps and projects using XLSForms, so it's well supported and maintained.

    The XLS source file syntax is a bit wierd at first, being a spreadsheet, so the osm-fieldwork project contains tested XLSForm templates for a variety of mapping project goals. These can be used to create efficient XForms that are easy to convert to OSM. The primary task when manually converting ODK collected data into OSM format is converting the tags. If the XLSForm is created with a focus towards OSM the XLSForm can make this a much simpler process. This is detailed more in this document. Simply stated, what is in the name colum in the XLSForm becomes the name of the tag in OSM, and the response from the choices sheet becomes the value.

    "},{"location":"odkconflation/#odk-collect-central","title":"ODK Collect & Central","text":"

    ODK Collect is a mobile app for data collection using XLSForms. It's server side is ODK Central, which replaces the older ODK Aggregate. ODK Central manages the XLSForms downloaded to your phone, as wall as the submissions uploaded from your phone when back online.

    A related project for processing ODK data and working remotely with Central is osm-fieldwork. This Python project handles conversion of the various data files from Collect or Central, into OSM XML and GeoJson for future processing via editing or conflation. This is heavily used in the FMTM backend.

    "},{"location":"odkconflation/#field-data-collection","title":"Field Data Collection","text":"

    Collecting data in the field is to best way to add data to OpenStreetMap. Whether done by casual mappers adding POIs, to more dedicated mappers, what is reality at that moment is the key to keeping OSM fresh and updated. When it comes to improving the metadata for buildings, many have been imported with building=yes from remote mapping using the HOT Tasking Manager to trace buildings from satellite imagery.

    But ground-truthing what kind of building it is improvers the map. It may be a medical clinic, restaurant, residence, etc.. who know until somebody stands in front of the building to collect more informsation about it. This may be idenifying it as a clinic or reseidence, adding the building material, what is the roof made of, is it's power non-existance, or are there solar panels or a generator ? Some humanitarian mapping is collecting data on public toilets, and community water sources for future improvements.

    Knowing there is a building on the map is useful, but better yet is what is the building used for ? What is it made of ? Does it have AC or DC power ? Water available ? All of these details improve the map to make it more useful to others.

    "},{"location":"odkconflation/#field-mapping-camping-manager","title":"Field Mapping Camping Manager","text":"

    The Field Mapping Camping Manager (FMTM) is a project to oprganize large scale data collection using ODK Collect and ODK Central. It uses the osm-fieldwork project for much of the backend processing of the ODK data, but is designed for large scale field mapping involving many people. It uses ODK Collect and ODK Central as the primary tools. One of the final steps in processing ODK data to import into OSM is conflating it with existing data. This can be done manually of course, but with a large number of data submissions this becomes tedious and time consuming. FMTM aggrgates all the data for an entire project, and may have thousands of submissions. This is where conflation is critical.

    "},{"location":"odkconflation/#the-algorythm","title":"The Algorythm","text":"

    Currently conflation is focused on ODK with OSM. This uses the conflator.py program which can conflate between the ODK data and an OSM data extract. There are other conflation programs in this project for other external datasets, but uses a postgres database instead of two files.

    "},{"location":"odkconflation/#the-conflator-class","title":"The Conflator() Class","text":"

    This is the primary interface for conflating files. It has two primary endpoint. This top level endpoint is Conflator.conflateFiles(), which is used when the conflator program is run standalone. It opens the two disk files, parses the various formats, and generates a data structure used for conflation. This class uses the Parsers() class from osm-fieldwork that can parse the JSON or CSV files downloaded from ODK Central, or the ODK XML \"instance\" files when working offline. OPSM XML or GeoJson files are also supported. Each entry in the files is turned into list of python dicts to make it easier to compaert the data.

    Once the two files are read, the Conflator.conflateFeatures() endpoint takes the two lists of data and does the actual conflation. There is an additional parameter passed to this endpoint that is the threshold distance. This is used to find all features in the OSM data extract within that distance. Note that this is a unit of the earth's circumforance, not meters, so distance calulations are a bit fuzzy.

    This is a brute force conflation algorythm, not fast but it tries to be complete. it is comprised of two loops. The top level loops through the ODK data. For each ODK data entry, it finds all the OSM features within that threshold distance. The inner loop then uses the closest feature and compares the tags. This is where things get interesting.... If there is a name tag in the ODK data, this is string compared with the name in the closest OSM feature. Fuzzy string matching is used to handle minor spelling differences. Sometimes the mis-spelling is in the OSM data, but often when entering names of features on your smartphone, mis-typing occurs. If there is a 100% match in the name tags, then chances are the feature exists in OSM already.

    If there is no name tag in the ODK data, then the other tags are compared to try to find a possible duplicate feature. For example, a public toilet at a trailhead has no name, but if both ODK and OSM have amenity=toilet, then it's very likey a duplicate. If no tags match, then the ODK data is proably a new feature.

    Any time a possible duplicate is found, it is not automatically merged. Instead a fixme tag is added to the feature in the output file with a statement that it is potentially a duplicate. When the output file is loaded into JOSM, you can search for this tag to manually decide if it is a duplicate.

    "},{"location":"odkconflation/#xlsform-design","title":"XLSForm Design","text":"

    Part of the key detail to improve conflation requires a carefully created XLSForm. There is much more detailed information on XLSForm design, but briefly whatever is in the name column in the survey sheet becomes the name of the tags, and whatever is in the name column in the choices sheet becomes the value. If you want a relatively smooth conflation, make sure your XLSForm uses OSM tagging schemas.

    If you don't follow OSM tagging, then conflation will assumme all your ODK data is a new feature, and you'll have to manually conflate the results using JOSM. That's OK for small datasets, but quickly becomes very tedious for the larger datasets that FMTM collects.

    "},{"location":"odkconflation/#the-output-file","title":"The Output File","text":"

    The output file must be in OSM XML to enable updating the ways. If the OSM data is a POI, viewing it in JOSM is easy. If the OSM data is a polygon, when loaded into JOSM, they won't appear at first. Since the OSM way created by conflation has preserved the refs used by OSM XML to reference the nodes, doing update modified in JOSM then pulls down the nodes and all the polygons will appear.

    "},{"location":"odkconflation/#conflicts","title":"Conflicts","text":"

    There are some interesting issues to fix post conflation. ODK data is usually a single POI, whereas in OSM it may be a polygon. Sometimes though the POI is already in OSM. Remote mapping or building footprint imports often have a polygon with a single building=yes tag. If the POI we collected in ODK has more data, for example this building is a restaurant serving pizza, and is made of brick.

    In OSM sometimes there is a POI for an amenity, as well as a building polygon that were added at different times by different people. The key detail for conflation is do any of the tags and values from the new data match existing data ?

    FMTM downloads a data extract from OSM using osm-rawdata, and then filters the data extract based on what is on the choices sheet of the XLSForm. Otherwise Collect won't launch. Because this data extract does not contain all the tags that are in OSM, it creates conflicts. This problem is FMTM specific, and can be improved by making more complete data extract from OSM.

    When the only tag in the OSM data is building=, any tags from ODK are merged with the building polygon when possible. If the OSM feature has other tags, JOSM will flag this as a conflict. Then you have to manually merge the tags in JOSM.

    "},{"location":"osm-merge/","title":"Conflator Program","text":"

    osm-merge is a program that conflates building footprint data with OpenStreetMap data to remove duplicates. The result of the conflation process is buildings that only exist in the footprints data file.

    This program can process data from either a postgres database, or data files in geojson, shapefile format. One of the core concepts is using a data file of polygons to filter the larger datasets, since a database may contain multiple countries.

    The process of setting up for large scale conflation is in this document.

    "},{"location":"osm-merge/#command-line-options","title":"Command Line Options","text":""},{"location":"osm-merge/#common-options","title":"Common Options","text":"

    These are the nost commonly used options.

    --help(-h)       Get command line options\n--verbose(-v)    Enable verbose output\n--boundary(-b)   Specify a multipolygon for boundaries, one file for each polygon\n--project(-p)    Tasking Manager project ID to get boundaries from database\n--osmdata(-x)    OSM XML/PBF or OSM database to get boundaries (prefix with pg: if database)\n--outdir(-o)     Output file prefix for output files (default \"/tmp/tmproject-\")\n--footprints(-f) File or building footprints Database URL (prefix with pg: if database)\n--dbhost(-d)     Database host, defaults to \"localhost\"\n--dbuser(-u)     Database user, defaults to current user\n--dbpass(-w)     Database user, defaults to no password needed\n
    "},{"location":"osm-merge/#tasking-manager-options","title":"Tasking Manager Options","text":"

    These options are used to dynamically extract a project boundary from a Tasking Manager database. A more common usage is to use the splitter.py program to download the project boundary from the Tasking Manager itself.

    --splittasks     When using the Tasking Manager database, split into tasks\n--schema         OSM database schema (pgsnapshot, ogr2ogr, osm2pgsql) defaults to \"pgsnapshot\"\n--tmdata(-t)     Tasking Manager database to get boundaries if no boundary file prefix with pg: for database usage, http for REST API\n
    "},{"location":"osm-merge/#osm-options","title":"OSM Options","text":"

    When extracting administrative boundaries from an OpenStreetMap database, the default admin levl is 4, which is commonly used for couty boundaries. This lets the user select what level of administrative boundaries they want.

    --admin(-a)      When querying the OSM database, this is the admin_level, (defaults to 4)\n
    "},{"location":"osm-merge/#examples","title":"Examples","text":"

    PATH/conflator.py -v -x 12057-osm.geojson -f 12057-ms.geojson -o 12057

    This takes two disk files, which have already been filtered to only contain data for the area to conflate.

    PATH/conflator.py -v -x pg:kenya -b 12007-project.geojson -f 12057-ms.geojson -o 12057

    This uses a database that contains all of Kenya, but we only want to process a single project, so that's supplied as the boundary. The foorptin data was already filtered using ogr2ogr, and the project ID is used as the prefix for the output files.

    PATH/conflator.py -v -x pg:kenya -b 12007-project.geojson -f pg:kenya_footprints -o 12057 -d mapdb -u me

    This is the same except the database is on a remote machine called mapdb and the user needs to be me.

    PATH/conflator.py -t tmsnap -p 8345 -b pg:kenya_foot -o pg:Kenya

    Reads from 3 data sources. The first one is a snapshot of the Tasking Manager database, and we want to use project 8345 as the boundary. The two data sources are prefixed with \"pg\", which defines them as a database URL instead of a file. The database needs to be running locally in this case.

    "},{"location":"osmhighways/","title":"OpenStreetMap Data","text":"

    Being crowd sourced and open to all who want to contribute, OpenStreetMap (OSM) has infinite flexibility in the various tag/values used for metadata. Many of the tags not in common use are ignored by the renderers and routing engines, but still live in the database and data files. You'd really only notice if you're deep in the data, which is the key to good conflation.

    The features in OSM come from a wide variety of sources. Mobile apps, imports, satellite imagery. Often features traced from imagery are lacking any tags beyond building=yes or highway=track, which we hope to improve on by conflating with other datasets.

    "},{"location":"osmhighways/#data-janitor","title":"Data Janitor","text":"

    Being a data janitor is important, if rather boring and tedious task. Bugs in the data can lead to navigation problems at the very least. An accurate and detailed map is a thing of beauty, and often OSM gets really close.

    Unfortunately to conflate OSM data with external data sources, it needs to be cleaned up. Normally it gets cleaned up by the mapper, who has to manually review and edit the tags. Since the highway name is an important item used to confirm a near match in geometry, too much variety can make this a slow process.

    This project has an osmhighways.py program that is used to cleanup some of the problems, like deleting unnecessary tags, and fixing the name vs reference number problem. Deleting all bogus tags reduces the data size, which is a benefit. This project also extracts only highway linestrings, so a clean dataset for conflating geometries.

    "},{"location":"osmhighways/#old-imports","title":"Old Imports","text":"

    OpenStreetMap (OSM) has a past history of imports, often done way back when OSM had little highway data. This was a way to bootstrap navigation, and it mostly worked.

    "},{"location":"osmhighways/#tiger","title":"TIGER","text":"

    Since it was publically available, the data used by the US Census Department was imported around 2007. The data is of varying quality, but was better than nothing. The OSM community has been cleaning up the mess ever since. More information on the TIGER fixup can be found here.

    An small example of the tags added from TIGER, all of which can be deleted.

        <tag k=\"tiger:name_base\" v=\"75th\"/>\n    <tag k=\"tiger:name_base_1\" v=\"75th\"/>\n    <tag k=\"tiger:name_direction_prefix\" v=\"N\"/>\n    <tag k=\"tiger:name_type\" v=\"St\"/>\n    <tag k=\"tiger:name_type_1\" v=\"St\"/>\n    <tag k=\"tiger:cfcc\" v=\"A41\"/>\n    <tag k=\"tiger:reviewed\" v=\"no\"/>\n

    I don't think I've ever seen a tiger:reviewed=yes tag.

    "},{"location":"osmhighways/#motor-vehicle-use-map-mvum","title":"Motor Vehicle Use Map (MVUM)","text":"

    The MVUM data is highways in national forests, so useful in remote area not always in TIGER. Or in TIGER but completely wrong. I've seen roads in TIGER that don't actually exist. All the MVUM data is better quality as much of the data was mapped by ground-truthing. It has useful data fields, like is a high clearance vehicle needed, what is the surface, and other access data like are ATVs allowed ?

    [MVUM)](https://data.fs.usda.gov/geodata/edw/edw_resources/shp/S_USA.Road_MVUM.zip

    "},{"location":"osmhighways/#clipping","title":"Clipping","text":"

    To support conflation, even OSM data needs to be chopped into smaller pieces. While osmium and osmfilter could so this, I've had problmes with the other tools when the task polygon is small. The osmhighways.py program also clips files. Since it's OSM data, we can't really use shapely, or geopandas, just osmium. It's a bit slow, being pure python.

    "},{"location":"trails/","title":"National Park Service Trails","text":"

    This processes both the National Park Service trails dataset, and the National Forest Service trail datasets. The schema of the two datasets is very similar. One of the differences is for Park Service Trails has two default tags in the output file which are bicycle=no and motor_vehicle=no. These default tags are documented here.

    This dataset is available in a variety of formats from the ArcGIS Hub.

    "},{"location":"trails/#processed-fields","title":"Processed Fields","text":"

    These are the fields extracted from the data that are converted to OpenStreetMap syntax so they can be conflated.

    • OBJECTID becomes id
    • TRLNAME becomes name
    • TRLCLASS becomes sac_scale
    • TRLUSE becomes yes for horse, bicycle, atv, etc...
    • TRLALTNAME becomes alt_name
    • SEASONAL becomes seasonal
    • MAINTAINER becomas operator
    • TRLSURFACE becomes surface
    "},{"location":"trails/#dropped-fields","title":"Dropped Fields","text":"

    These fields are all ignored, and are dropped from the output file.

    • MAPLABEL
    • TRLSTATUS
    • TRLTYPE
    • PUBLICDISP
    • DATAACCESS
    • ACCESSNOTE
    • ORIGINATOR
    • UNITCODE
    • UNITNAME
    • UNITTYPE
    • GROUPCODE
    • GROUPNAME
    • REGIONCODE
    • CREATEDATE
    • EDITDATE
    • LINETYPE
    • MAPMETHOD
    • MAPSOURCE
    • SOURCEDATE
    • XYACCURACY
    • GEOMETRYID
    • FEATUREID
    • FACLOCID
    • FACASSETID
    • IMLOCID
    • OBSERVABLE
    • ISEXTANT
    • OPENTOPUBL
    • ALTLANGNAM
    • ALTLANG
    • NOTES
    "},{"location":"trails/#national-forest-service-trails","title":"National Forest Service Trails","text":"

    The US Forest Service makes much of their data publically accessible, so it's been a source for imports for a long time. There is a nice detailed wiki page on the Forest Service Data. The conversion process handles most of the implementation details.

    "},{"location":"trails/#keep-fields","title":"Keep Fields","text":"

    The two primary fields are TRAIL_NO, which is used for the ref:usfs tags, and TRAIL_NAME, which is the name of the trail. In addition to these

    "},{"location":"trails/#the-5-variations","title":"The 5 Variations","text":"

    For many of the features classes, there are 5 variations on each one which is used for access.

    • Managed: Usage allowed and managed by the forest service
    • Accepted: Usage is accepted year round
    • Accepted/Discouraged: Usage is accepted, but discouraged
    • Restricted: Usage is restricted
    • Discouraged: Usage is discouraged

    These are converted to the apppropriate value.

    • Managed* sets the keyword to designated
    • Accepted* sets the keyword to yes
    • Restricted* sets the keyword to no
    • Discouraged* sets the keyword to discouraged
    • Accepted/Discouraged* sets the keyword to permissive

    Many of the values for these are NULL, so ignored when generating the output file. If the value exists, it's either a Y or a N, which is used to set the values. For example: \"SNOWMOBILE\": \"Y\" becomes snowmobile=yes in the output file.

    • PACK_SADDLE_ becomes horse=
    • BICYCLE_ becomes bicycle=
    • MOTORCYCLE_ becomes motorcycle=
    • ATV_ becoms atv=
    • FOURWD_ becomes 4wd_only
    • SNOWMOBILE_ becomes snowmobile=
    • SNOWSHOE_ becomes snowwhoe=
    • XCOUNTRY_SKI_ becomes ski

    Currently these fields appear to be empty, but that may change in the future.

    • SNOWCOACH_SNOWCAT_
    • SNOWCOACH_SNOWCAT_
    • E_BIKE_CLASS1_
    • E_BIKE_CLASS2_
    • E_BIKE_CLASS3_

    This field is ignored as it's assumed the trail is accessible by hikers.

    • HIKER_PEDESTRIAN_
    "},{"location":"trails/#dropped-fields_1","title":"Dropped Fields","text":"

    These fields are dropped as unnecessary for OSM. Manye only have a NULL value anyway, so useless.

    • MOTOR_WATERCRAFT_
    • NONMOTOR_WATERCRAFT_
    • GIS_MILES
    • Geometry Column
    • TRAIL_TYPE
    • TRAIL_CN
    • BMP
    • EMP
    • SEGMENT_LENGTH
    • ADMIN_ORG
    • MANAGING_ORG
    • SECURITY_ID
    • ATTRIBUTESUBSET
    • NATIONAL_TRAIL_DESIGNATION
    • TRAIL_CLASS
    • ACCESSIBILITY_STATUS
    • TRAIL_SURFACE
    • SURFACE_FIRMNESS
    • TYPICAL_TRAIL_GRADE
    • TYPICAL_TREAD_WIDTH
    • MINIMUM_TRAIL_WIDTH
    • TYPICAL_TREAD_CROSS_SLOPE
    • SPECIAL_MGMT_AREA
    • TERRA_BASE_SYMBOLOGY
    • MVUM_SYMBOL
    • TERRA_MOTORIZED
    • SNOW_MOTORIZED
    • WATER_MOTORIZED
    • ALLOWED_TERRA_USE
    • ALLOWED_SNOW_USE
    "},{"location":"trails/#options","title":"Options","text":"
    -h, --help            show this help message and exit\n-v, --verbose         verbose output\n-i INFILE, --infile INFILE input data file\n-c, --convert         Convert feature to OSM feature\n-o OUTFILE, --outfile OUTFILE Output GeoJson file\n
    "},{"location":"usgs/","title":"US Topographical Data","text":""},{"location":"usgs/#us-topographical-trails","title":"US Topographical Trails","text":"
    • OBJECTID
    • permanenti
    • name
    • namealtern
    • trailnumbe
    • trailnum_1
    • sourcefeat
    • sourcedata
    • sourceda_1
    • sourceorig
    • loaddate
    • trailtype
    • hikerpedes
    • bicycle
    • packsaddle
    • atv
    • motorcycle
    • ohvover50i
    • snowshoe
    • crosscount
    • dogsled
    • snowmobile
    • nonmotoriz
    • motorizedw
    • primarytra
    • nationaltr
    • lengthmile
    • networklen
    • SHAPE_Leng
    "},{"location":"usgs/#us-topographical-highways","title":"US Topographical Highways","text":"
    • OBJECTID
    • permanent_
    • source_fea
    • source_dat
    • source_d_1
    • source_ori
    • loaddate
    • interstate
    • us_route
    • state_rout
    • county_rou
    • federal_la
    • stco_fipsc
    • tnmfrc
    • name
    • mtfcc_code
    • intersta_1
    • intersta_2
    • intersta_3
    • us_route_a
    • us_route_b
    • us_route_c
    • state_ro_1
    • state_ro_2
    • state_ro_3
    • SHAPE_Leng
    "},{"location":"utilities/","title":"Utility Programs","text":"

    To conflate external datasets with OSM, the external data needs to be converted to the OSM tagging schema. Otherwise comparing tags gets very convoluted. Since every dataset uses a different schema, included are a few utility programs for converting external datasets. Currently the only datatsets are for highways. These datasets are available from the USDA, and have an appropriate license to use with OpenStreetMap. Indeed, some of this data has already been imported. The files are available from the FSGeodata Clearinghouse

    Most of the fields in the dataset aren't needed for OSM, only the reference number if it has one, and the name. Most of these highways are already in OSM, but it's a bit of a mess, and mostly invalidated. Most of the problems are related to the TIGER import in 2007. So the goal of these utilities is to add in the TIGER fixup work by updating or adding the name and a reference number. These utilities prepare the dataset for conflation.

    There are other fields in the datasets we might want, like surface type, is it 4wd only, etc... but often the OSM data is more up to date. And to really get that right, you need to ground truth it.

    "},{"location":"utilities/#mvumpy","title":"mvum.py","text":"

    This converts the Motor Vehicle Use Map(MVUM) dataset that contains data on highways more suitable for offroad vehicles. Some require specialized offroad vehicles like a UTV or ATV. The data in OSM for these roads is really poor. Often the reference number is wrong, or lacks the suffix. We assume the USDA data is correct when it comes to name and reference number, and this will get handled later by conflation.

    "},{"location":"utilities/#roadcorepy","title":"roadcore.py","text":"

    This converts the Road Core vehicle map. This contains data on all highways in a national forest. It's similar to the MVUM dataset.

    "},{"location":"utilities/#trailspy","title":"trails.py","text":"

    This converts the NPSPublish Trail dataset. These are hiking trails not open to motor vehicles. Currently much of this dataset has empty fields, but the trail name and reference number is useful. This utility is to support the OpenStreetMap US Trails Initiative.

    "},{"location":"utilities/#usgspy","title":"usgs.py","text":"

    This converts the raw data used to print Topographical maps in the US. This obviously is a direct source when it comes to names if you want to be accurate. Although things do change over time, so you still have to validate it all. The files are available from the National Map. I use the Shapefiles, as the different categories are in separate files inside the zip. Each one covers a 7.5 quad square on a topo map. These have to be merged together into a single file to be practical.

    "},{"location":"utilities/#osmhighwayspy","title":"osmhighways.py","text":"

    On the OSM wiki, there is a list of incorrect tagging for forest highway names. Basically the name shouldn't be something like \"Forest Service Road 123.4A\". That's actually a reference number, not a name. This is primarily a problem with existing OSM data. These would all have to get manually fixed when validating in JOSM, so this program automates the process so you only have to validate, and not edit the feature. This also extracts only highway linestrings, so is used to create the OSM dataset for conflation. Since the other external datasets also correctly use name, ref, and ref:usfs, this simplifys conflation. Otherwise the algorithm would get very complicated and hard to maintain.

    "},{"location":"utilities/#geojson2polypy","title":"geojson2poly.py","text":"

    This is a very simple utility to convert a GeoJson boundary Multipolygon into an Osmosis poly file. This can be used with osmium, or osmconvert to make data extracts.

    "},{"location":"wiki_redirect/","title":"OSM RawData","text":"

    Please see the docs page at: https://hotosm.github.io/conflator/

    "},{"location":"zion/","title":"Analyzing Zion National Park Trails","text":"

    As an aid to debugging my conflation software, I decided to use Zion National Park trail data. This involved two external datasets, USGS vector topographical maps and the National Park Service trails dataset.The Topographical maps are in ShapeFile format, the NPS trails is in GeoJson.

    The topographical dataset has many more attributes than the NPS dataset. For example, the topo dataset contains access information, which is one of the goals of the Trail Access Project. One of the details I noticed was having a value of designated instead of yes if the trail is in an official source. There are multiple access types, horse, bicycles, etc... having them be no might be useless data as it could be assumed if the access is allowed.

    \"properties\": {\n    \"highway\": \"path\",\n    \"source\": \"National Park Service\",\n    \"bicycle\": \"no\",\n    \"atv\": \"no\",\n    \"horse\": \"designated\",\n    \"motorcycle\": \"no\",\n    \"snowmobile\": \"no\"\n    },\n
    "},{"location":"zion/#conflating-with-openstreetmap","title":"Conflating with OpenStreetMap","text":"

    One big difference is that the OpenStreetMap dataset has many more features tagged with highway than the other datasets. OSM has mucn more detail, campground loop roads, service roads,

    Topo Trails Coalpits Wash Trail (official) Dalton Wash Trail (BLM ?) Huber Wash Trail (not sure) Left Fork North Creek Trail aka Subway (official)

    The Subway (Bottom) in Topo and Left Fork North Creek Trail in OSM

    Pa'rus Trail is same in topo and nps, not in OSM.

    Deertrap Mountain Trail, or Cable Mountain.

    nps:COMMENT=062904-GPSed for cultural projects coverage nps:EDIT_DATE=082004 nps:ED_COMMENT=063004-removed spikes from arc nps:MILES=0.182262

    "}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 4873a54..bb7468c 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,87 +2,92 @@ https://www.hotosm.org/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/CHANGELOG/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/LICENSE/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/about/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/api/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/calculations/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/conflation/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/formats/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/highways/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/mvum/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/odkconflation/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/osm-merge/ - 2024-09-28 + 2024-10-06 + daily + + + https://www.hotosm.org/osmhighways/ + 2024-10-06 daily https://www.hotosm.org/trails/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/usgs/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/utilities/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/wiki_redirect/ - 2024-09-28 + 2024-10-06 daily https://www.hotosm.org/zion/ - 2024-09-28 + 2024-10-06 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index cf1cccfd599f39f1cf4688b6de1f7d2539e9384f..68985029ae3591144d4e2a15cde995e1addf14da 100644 GIT binary patch literal 328 zcmV-O0k{4iiwFo42?J*W|8r?{Wo=<_E_iKh0L_-mO2jY}hWGOnCHpp=K}4m^AVVF6 zVO*$diD}w~$)zUg)bZ&}FG#m;6DK@Zv~`KGHFm5C6Nl#_PLo?iR#Aw-U>c2) zNyEtC;%1x1c;nP)lHp--sin>en&fymTxyB)%2Y*`X18&=iL-T(%(aD#Hbiq)TlQI0M96E77nKPD z|5E&cXa9JAI+Vxf130qd)4n{tl;F6?hv_^#s#btwqD0b85+8=o!Gklpp4h;l)y68~ z;ayoHc+-