docs: fix ref to conflator. modules to osm_merge.conflator

hotosm · Aug 6, 2024 · bb3e4a8 · bb3e4a8
1 parent 033ad08
commit bb3e4a8
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 78 deletions.
diff --git a/docs/api.md b/docs/api.md
@@ -2,13 +2,12 @@
 
 ## config.py
 
-::: conflator.conflateBuildings
+::: osm_merge.conflator.conflateBuildings
 options:
 show_source: false
 heading_level: 3
 
-::: conflator.conflatePOI
+::: osm_merge.conflator.conflatePOI
 options:
 show_source: false
 heading_level: 3
-
diff --git a/osm_merge/conflateBuildings.py b/osm_merge/conflateBuildings.py
@@ -14,44 +14,37 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-    
+
 import argparse
 import logging
 import sys
-import os
-from sys import argv
-from geojson import Point, Feature, FeatureCollection, dump, Polygon
+
 import geojson
-import psycopg2
-from shapely.geometry import shape, Polygon, mapping
-import shapely
-from shapely import wkt, wkb
-import xmltodict
-from progress.bar import Bar, PixelBar
-from progress.spinner import PixelSpinner
 from codetiming import Timer
-import concurrent.futures
 from cpuinfo import get_cpu_info
-from haversine import haversine, Unit
-from thefuzz import fuzz, process
-from osm_rawdata.postgres import uriParser, DatabaseAccess
-# from conflator.geosupport import GeoSupport
+from geojson import Feature, FeatureCollection, Polygon
+
+# from osm_merge.geosupport import GeoSupport
 from geosupport import GeoSupport
+from osm_rawdata.postgres import uriParser
+from shapely import wkb
+from shapely.geometry import Polygon, shape
 
 # Instantiate logger
 log = logging.getLogger(__name__)
 
 # The number of threads is based on the CPU cores
 info = get_cpu_info()
-cores = info['count']
+cores = info["count"]
+
 
 class ConflateBuildings(object):
-    def __init__(self,
-                 dburi: str = None,
-                 boundary: Polygon = None,
-                 ):
-        """
-        This class conflates data that has been imported into a postgres
+    def __init__(
+        self,
+        dburi: str = None,
+        boundary: Polygon = None,
+    ):
+        """This class conflates data that has been imported into a postgres
         database using the Underpass raw data schema.
 
         Args:
@@ -70,20 +63,18 @@ def __init__(self,
         self.view = "ways_poly"
         self.filter = list()
 
-    def addSourceFilter(self,
-                        source: str,
-                        ):
-        """
-        Add to a list of suspect bad source datasets
-
-        """
+    def addSourceFilter(
+        self,
+        source: str,
+    ):
+        """Add to a list of suspect bad source datasets"""
         self.filter.append(source)
 
-    def overlapDB(self,
-                dburi: str,
-                ):
-        """
-        Conflate buildings where all the data is in the same postgres database
+    def overlapDB(
+        self,
+        dburi: str,
+    ):
+        """Conflate buildings where all the data is in the same postgres database
         using the Underpass raw data schema.
 
         Args:
@@ -94,95 +85,94 @@ def overlapDB(self,
         timer = Timer(text="conflateData() took {seconds:.0f}s")
         timer.start()
         # Find duplicate buildings in the same database
-        #sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
-        #sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1"
+        # sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
+        # sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1"
         # Make a new postgres VIEW of all overlapping or touching buildings
-        #log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
-        #print(sql)
+        # log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
+        # print(sql)
         # Views must be dropped in the right order
-        sql = f"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;"
+        sql = (
+            "DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;"
+        )
         result = self.db.queryDB(sql)
 
         if self.boundary:
             self.db.clipDB(self.boundary)
 
-        log.debug(f"Clipping OSM database")
+        log.debug("Clipping OSM database")
         ewkt = shape(self.boundary)
         uri = uriParser(dburi)
         log.debug(f"Extracting OSM subset from \"{uri['dbname']}\"")
         sql = f"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL"
         # print(sql)
         result = self.db.queryDB(sql)
 
-        sql = f"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL"
+        sql = "CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL"
         print(sql)
         result = self.db.queryDB(sql)
 
     def cleanDuplicates(self):
-        """
-        Delete the entries from the duplicate building view.
+        """Delete the entries from the duplicate building view.
 
         Returns:
             (FeatureCollection): The entries from the datbase table
         """
-        log.debug(f"Removing duplicate buildings from ways_view")
-        sql = f"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)"
+        log.debug("Removing duplicate buildings from ways_view")
+        sql = "DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)"
 
         result = self.db.queryDB(sql)
         return True
 
     def getNew(self):
-        """
-        Get only the new buildings
+        """Get only the new buildings
 
         Returns:
             (FeatureCollection): The entries from the datbase table
         """
-        sql = f"SELECT osm_id,geom,tags FROM ways_view"
+        sql = "SELECT osm_id,geom,tags FROM ways_view"
         result = self.db.queryDB(sql)
         features = list()
         for item in result:
             # log.debug(item)
-            entry = {'osm_id': item[0]}
+            entry = {"osm_id": item[0]}
             entry.update(item[2])
             geom = wkb.loads(item[1])
             features.append(Feature(geometry=geom, properties=entry))
 
         log.debug(f"{len(features)} new features found")
         return FeatureCollection(features)
 
-    def findHighway(self,
-                    feature: Feature,
-                    ):
-        """
-        Find the nearest highway to a feature
+    def findHighway(
+        self,
+        feature: Feature,
+    ):
+        """Find the nearest highway to a feature
 
         Args:
             feature (Feature): The feature to check against
         """
         pass
 
     def getDuplicates(self):
-        """
-        Get the entries from the duplicate building view.
+        """Get the entries from the duplicate building view.
 
         Returns:
             (FeatureCollection): The entries from the datbase table
         """
-        sql = f"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view"
+        sql = "SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view"
         result = self.db.queryDB(sql)
         features = list()
         for item in result:
-            #log.debug(item)
+            # log.debug(item)
             # First building identified
-            entry = {'area': float(item[0]), 'id': int(item[1])}
+            entry = {"area": float(item[0]), "id": int(item[1])}
             geom = wkb.loads(item[2])
             entry.update(item[3])
             features.append(Feature(geometry=geom, properties=entry))
 
             # Second building identified
-            entry = {'area': float(item[0]), 'id': int(item[4])}
-            entry['id'] = int(item[4])
+            entry = {"area": float(item[0]), "id": int(item[4])}
+            entry["id"] = int(item[4])
             geom = wkb.loads(item[5])
             entry.update(item[6])
             # FIXME: Merge the tags from the buildings into the OSM feature
@@ -192,6 +182,7 @@ def getDuplicates(self):
         log.debug(f"{len(features)} duplicate features found")
         return FeatureCollection(features)
 
+
 def main():
     """This main function lets this class be run standalone by a bash script"""
     parser = argparse.ArgumentParser(
@@ -205,8 +196,7 @@ def main():
     parser.add_argument("-v", "--verbose", action="store_true", help="verbose output")
     parser.add_argument("-d", "--dburi", required=True, help="Source Database URI")
     parser.add_argument("-o", "--osmuri", required=True, help="OSM Database URI")
-    parser.add_argument("-b", "--boundary", required=True,
-                        help="Boundary polygon to limit the data size")
+    parser.add_argument("-b", "--boundary", required=True, help="Boundary polygon to limit the data size")
     # parser.add_argument("-o", "--outfile", help="Post conflation output file")
 
     args = parser.parse_args()
@@ -216,33 +206,32 @@ def main():
         log.setLevel(logging.DEBUG)
         ch = logging.StreamHandler(sys.stdout)
         ch.setLevel(logging.DEBUG)
-        formatter = logging.Formatter(
-            "%(threadName)10s - %(name)s - %(levelname)s - %(message)s"
-        )
+        formatter = logging.Formatter("%(threadName)10s - %(name)s - %(levelname)s - %(message)s")
         ch.setFormatter(formatter)
         log.addHandler(ch)
 
-    file = open(args.boundary, 'r')
+    file = open(args.boundary, "r")
     boundary = geojson.load(file)
-    if 'features' in boundary:
-        poly = boundary['features'][0]['geometry']
+    if "features" in boundary:
+        poly = boundary["features"][0]["geometry"]
     else:
-        poly = boundary['geometry']
+        poly = boundary["geometry"]
     cdb = ConflateBuildings(args.dburi, poly)
     cdb.overlapDB(args.osmuri)
     features = cdb.getDuplicates()
 
     # FIXME: These are only for debugging
-    file = open("foo.geojson", 'w')
+    file = open("foo.geojson", "w")
     geojson.dump(features, file)
-    log.info(f"Wrote foo.geojson for duplicates")
+    log.info("Wrote foo.geojson for duplicates")
 
     cdb.cleanDuplicates()
     features = cdb.getNew()
-    file = open("bar.geojson", 'w')
+    file = open("bar.geojson", "w")
     geojson.dump(features, file)
 
-    log.info(f"Wrote bar.geojson for new buildings")
+    log.info("Wrote bar.geojson for new buildings")
+
 
 if __name__ == "__main__":
     """This is just a hook so this file can be run standlone during development."""