Skip to content

Commit

Permalink
docs: fix ref to conflator. modules to osm_merge.conflator
Browse files Browse the repository at this point in the history
  • Loading branch information
spwoodcock committed Aug 6, 2024
1 parent 033ad08 commit bb3e4a8
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 78 deletions.
5 changes: 2 additions & 3 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

## config.py

::: conflator.conflateBuildings
::: osm_merge.conflator.conflateBuildings
options:
show_source: false
heading_level: 3

::: conflator.conflatePOI
::: osm_merge.conflator.conflatePOI
options:
show_source: false
heading_level: 3

139 changes: 64 additions & 75 deletions osm_merge/conflateBuildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,44 +14,37 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import argparse
import logging
import sys
import os
from sys import argv
from geojson import Point, Feature, FeatureCollection, dump, Polygon

import geojson
import psycopg2
from shapely.geometry import shape, Polygon, mapping
import shapely
from shapely import wkt, wkb
import xmltodict
from progress.bar import Bar, PixelBar
from progress.spinner import PixelSpinner
from codetiming import Timer
import concurrent.futures
from cpuinfo import get_cpu_info
from haversine import haversine, Unit
from thefuzz import fuzz, process
from osm_rawdata.postgres import uriParser, DatabaseAccess
# from conflator.geosupport import GeoSupport
from geojson import Feature, FeatureCollection, Polygon

# from osm_merge.geosupport import GeoSupport
from geosupport import GeoSupport
from osm_rawdata.postgres import uriParser
from shapely import wkb
from shapely.geometry import Polygon, shape

# Instantiate logger
log = logging.getLogger(__name__)

# The number of threads is based on the CPU cores
info = get_cpu_info()
cores = info['count']
cores = info["count"]


class ConflateBuildings(object):
def __init__(self,
dburi: str = None,
boundary: Polygon = None,
):
"""
This class conflates data that has been imported into a postgres
def __init__(
self,
dburi: str = None,
boundary: Polygon = None,
):
"""This class conflates data that has been imported into a postgres
database using the Underpass raw data schema.
Args:
Expand All @@ -70,20 +63,18 @@ def __init__(self,
self.view = "ways_poly"
self.filter = list()

def addSourceFilter(self,
source: str,
):
"""
Add to a list of suspect bad source datasets
"""
def addSourceFilter(
self,
source: str,
):
"""Add to a list of suspect bad source datasets"""
self.filter.append(source)

def overlapDB(self,
dburi: str,
):
"""
Conflate buildings where all the data is in the same postgres database
def overlapDB(
self,
dburi: str,
):
"""Conflate buildings where all the data is in the same postgres database
using the Underpass raw data schema.
Args:
Expand All @@ -94,95 +85,94 @@ def overlapDB(self,
timer = Timer(text="conflateData() took {seconds:.0f}s")
timer.start()
# Find duplicate buildings in the same database
#sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
#sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1"
# sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
# sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1"
# Make a new postgres VIEW of all overlapping or touching buildings
#log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
#print(sql)
# log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
# print(sql)
# Views must be dropped in the right order
sql = f"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;"
sql = (
"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;"
)
result = self.db.queryDB(sql)

if self.boundary:
self.db.clipDB(self.boundary)

log.debug(f"Clipping OSM database")
log.debug("Clipping OSM database")
ewkt = shape(self.boundary)
uri = uriParser(dburi)
log.debug(f"Extracting OSM subset from \"{uri['dbname']}\"")
sql = f"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL"
# print(sql)
result = self.db.queryDB(sql)

sql = f"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL"
sql = "CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL"
print(sql)
result = self.db.queryDB(sql)

def cleanDuplicates(self):
"""
Delete the entries from the duplicate building view.
"""Delete the entries from the duplicate building view.
Returns:
(FeatureCollection): The entries from the datbase table
"""
log.debug(f"Removing duplicate buildings from ways_view")
sql = f"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)"
log.debug("Removing duplicate buildings from ways_view")
sql = "DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)"

result = self.db.queryDB(sql)
return True

def getNew(self):
"""
Get only the new buildings
"""Get only the new buildings
Returns:
(FeatureCollection): The entries from the datbase table
"""
sql = f"SELECT osm_id,geom,tags FROM ways_view"
sql = "SELECT osm_id,geom,tags FROM ways_view"
result = self.db.queryDB(sql)
features = list()
for item in result:
# log.debug(item)
entry = {'osm_id': item[0]}
entry = {"osm_id": item[0]}
entry.update(item[2])
geom = wkb.loads(item[1])
features.append(Feature(geometry=geom, properties=entry))

log.debug(f"{len(features)} new features found")
return FeatureCollection(features)

def findHighway(self,
feature: Feature,
):
"""
Find the nearest highway to a feature
def findHighway(
self,
feature: Feature,
):
"""Find the nearest highway to a feature
Args:
feature (Feature): The feature to check against
"""
pass

def getDuplicates(self):
"""
Get the entries from the duplicate building view.
"""Get the entries from the duplicate building view.
Returns:
(FeatureCollection): The entries from the datbase table
"""
sql = f"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view"
sql = "SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view"
result = self.db.queryDB(sql)
features = list()
for item in result:
#log.debug(item)
# log.debug(item)
# First building identified
entry = {'area': float(item[0]), 'id': int(item[1])}
entry = {"area": float(item[0]), "id": int(item[1])}
geom = wkb.loads(item[2])
entry.update(item[3])
features.append(Feature(geometry=geom, properties=entry))

# Second building identified
entry = {'area': float(item[0]), 'id': int(item[4])}
entry['id'] = int(item[4])
entry = {"area": float(item[0]), "id": int(item[4])}
entry["id"] = int(item[4])
geom = wkb.loads(item[5])
entry.update(item[6])
# FIXME: Merge the tags from the buildings into the OSM feature
Expand All @@ -192,6 +182,7 @@ def getDuplicates(self):
log.debug(f"{len(features)} duplicate features found")
return FeatureCollection(features)


def main():
"""This main function lets this class be run standalone by a bash script"""
parser = argparse.ArgumentParser(
Expand All @@ -205,8 +196,7 @@ def main():
parser.add_argument("-v", "--verbose", action="store_true", help="verbose output")
parser.add_argument("-d", "--dburi", required=True, help="Source Database URI")
parser.add_argument("-o", "--osmuri", required=True, help="OSM Database URI")
parser.add_argument("-b", "--boundary", required=True,
help="Boundary polygon to limit the data size")
parser.add_argument("-b", "--boundary", required=True, help="Boundary polygon to limit the data size")
# parser.add_argument("-o", "--outfile", help="Post conflation output file")

args = parser.parse_args()
Expand All @@ -216,33 +206,32 @@ def main():
log.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter(
"%(threadName)10s - %(name)s - %(levelname)s - %(message)s"
)
formatter = logging.Formatter("%(threadName)10s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
log.addHandler(ch)

file = open(args.boundary, 'r')
file = open(args.boundary, "r")
boundary = geojson.load(file)
if 'features' in boundary:
poly = boundary['features'][0]['geometry']
if "features" in boundary:
poly = boundary["features"][0]["geometry"]
else:
poly = boundary['geometry']
poly = boundary["geometry"]
cdb = ConflateBuildings(args.dburi, poly)
cdb.overlapDB(args.osmuri)
features = cdb.getDuplicates()

# FIXME: These are only for debugging
file = open("foo.geojson", 'w')
file = open("foo.geojson", "w")
geojson.dump(features, file)
log.info(f"Wrote foo.geojson for duplicates")
log.info("Wrote foo.geojson for duplicates")

cdb.cleanDuplicates()
features = cdb.getNew()
file = open("bar.geojson", 'w')
file = open("bar.geojson", "w")
geojson.dump(features, file)

log.info(f"Wrote bar.geojson for new buildings")
log.info("Wrote bar.geojson for new buildings")


if __name__ == "__main__":
"""This is just a hook so this file can be run standlone during development."""
Expand Down

0 comments on commit bb3e4a8

Please sign in to comment.