Skip to content

Commit

Permalink
fix: conflate buildings from external datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
rsavoye committed Oct 28, 2023
1 parent fe2f3d8 commit 05634ce
Showing 1 changed file with 96 additions and 37 deletions.
133 changes: 96 additions & 37 deletions conflator/conflateBuildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ def __init__(self,
self.db = GeoSupport(dburi)
self.boundary = boundary
self.view = "ways_poly"
self.filter = list()

def addSourceFilter(self,
source: str,
):
"""
Add to a list of suspect bad source datasets
"""
self.filter.append(source)

def overlapDB(self,
dburi: str,
Expand All @@ -81,68 +91,105 @@ def overlapDB(self,
This is not fast for large areas!
"""
if self.boundary:
self.db.clipDB(self.boundary)

timer = Timer(text="conflateData() took {seconds:.0f}s")
timer.start()
# Find duplicate buildings in the same database
sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
#sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
#sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1"
# Make a new postgres VIEW of all overlapping or touching buildings
log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
## result = self.db.queryDB(sql)

sql = "SELECT area,id1,geom1,id2,geom2 FROM overlap_view"
## result = self.db.queryDB(sql)
result = list()
features = list()
for item in result:
# First building identified
entry = {'area': float(item[0]), 'id': int(item[1])}
geom = wkb.loads(item[2])
features.append(Feature(geometry=geom, properties=entry))
# Second building identified
entry['id'] = int(item[3])
geom = wkb.loads(item[4])
features.append(Feature(geometry=geom, properties=entry))
#log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...")
#print(sql)
# Views must be dropped in the right order
sql = f"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;"
result = self.db.queryDB(sql)

log.debug(f"{len(features)} overlapping features found")
if self.boundary:
self.db.clipDB(self.boundary)

log.debug(f"Clipping OSM database")
ewkt = shape(self.boundary)
uri = uriParser(dburi)
log.debug(f"Extracting OSM subset from \"{uri['dbname']}\"")
sql = f"DROP VIEW IF EXISTS osm_view CASCADE;CREATE VIEW osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom)"
sql = f"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL"
# print(sql)
result = self.db.queryDB(sql)

sql = f"DROP VIEW IF EXISTS dups_view;CREATE VIEW dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, osm_view AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)"
sql = f"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL"
print(sql)
result = self.db.queryDB(sql)

sql = "SELECT area,id1,geom1,id2,geom2 FROM dups_view"
def cleanDuplicates(self):
"""
Delete the entries from the duplicate building view.
Returns:
(FeatureCollection): The entries from the datbase table
"""
log.debug(f"Removing duplicate buildings from ways_view")
sql = f"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)"

result = self.db.queryDB(sql)
return True

log.debug(f"{len(result)} duplicates found")
def getNew(self):
"""
Get only the new buildings
Returns:
(FeatureCollection): The entries from the datbase table
"""
sql = f"SELECT osm_id,geom,tags FROM ways_view"
result = self.db.queryDB(sql)
features = list()
for item in result:
# log.debug(item)
entry = {'osm_id': item[0]}
entry.update(item[2])
geom = wkb.loads(item[1])
features.append(Feature(geometry=geom, properties=entry))

log.debug(f"{len(features)} new features found")
return FeatureCollection(features)

def findHighway(self,
feature: Feature,
):
"""
Find the nearest highway to a feature
Args:
feature (Feature): The feature to check against
"""
pass

def getDuplicates(self):
"""
Get the entries from the duplicate building view.
Returns:
(FeatureCollection): The entries from the datbase table
"""
sql = f"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view"
result = self.db.queryDB(sql)
features = list()
for item in result:
#log.debug(item)
# First building identified
entry = {'area': float(item[0]), 'id': int(item[1])}
# FIXME: Do we want to filter by the size of the overlap ?
# It's not exactly a reliable number since buildings are
# different sizes.
# if entry['area'] < 0.04:
# log.debug(f"FOO: {entry['area']}")
# continue
geom = wkb.loads(item[2])
entry.update(item[3])
features.append(Feature(geometry=geom, properties=entry))

# Second building identified
entry['id'] = int(item[3])
geom = wkb.loads(item[4])
entry = {'area': float(item[0]), 'id': int(item[4])}
entry['id'] = int(item[4])
geom = wkb.loads(item[5])
entry.update(item[6])
# FIXME: Merge the tags from the buildings into the OSM feature
# entry.update(item[3])
features.append(Feature(geometry=geom, properties=entry))

# FIXME: debug only!
bar = open("foo.geojson", 'w')
geojson.dump(FeatureCollection(features), bar)
log.debug(f"{len(features)} duplicate features found")
return FeatureCollection(features)

def main():
Expand Down Expand Up @@ -183,7 +230,19 @@ def main():
poly = boundary['geometry']
cdb = ConflateBuildings(args.dburi, poly)
cdb.overlapDB(args.osmuri)
# log.info(f"Wrote {args.outfile}")
features = cdb.getDuplicates()

# FIXME: These are only for debugging
file = open("foo.geojson", 'w')
geojson.dump(features, file)
log.info(f"Wrote foo.geojson for duplicates")

cdb.cleanDuplicates()
features = cdb.getNew()
file = open("bar.geojson", 'w')
geojson.dump(features, file)

log.info(f"Wrote bar.geojson for new buildings")

if __name__ == "__main__":
"""This is just a hook so this file can be run standlone during development."""
Expand Down

0 comments on commit 05634ce

Please sign in to comment.