diff --git a/conflator/conflateBuildings.py b/conflator/conflateBuildings.py index 4b86dde..467ea17 100755 --- a/conflator/conflateBuildings.py +++ b/conflator/conflateBuildings.py @@ -68,6 +68,16 @@ def __init__(self, self.db = GeoSupport(dburi) self.boundary = boundary self.view = "ways_poly" + self.filter = list() + + def addSourceFilter(self, + source: str, + ): + """ + Add to a list of suspect bad source datasets + + """ + self.filter.append(source) def overlapDB(self, dburi: str, @@ -81,68 +91,105 @@ def overlapDB(self, This is not fast for large areas! """ - if self.boundary: - self.db.clipDB(self.boundary) - timer = Timer(text="conflateData() took {seconds:.0f}s") timer.start() # Find duplicate buildings in the same database - sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)" + #sql = f"DROP VIEW IF EXISTS overlap_view;CREATE VIEW overlap_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, {self.view} AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)" #sql = "SELECT * FROM (SELECT ways_view.id, tags, ROW_NUMBER() OVER(PARTITION BY geom ORDER BY ways_view.geom asc) AS Row, geom FROM ONLY ways_view) dups WHERE dups.Row > 1" # Make a new postgres VIEW of all overlapping or touching buildings - log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...") - ## result = self.db.queryDB(sql) - - sql = "SELECT area,id1,geom1,id2,geom2 FROM overlap_view" - ## result = self.db.queryDB(sql) - result = list() - features = list() - for item in result: - # First building identified - entry = {'area': float(item[0]), 'id': int(item[1])} - geom = wkb.loads(item[2]) - features.append(Feature(geometry=geom, properties=entry)) - # Second building identified - entry['id'] = int(item[3]) - geom = wkb.loads(item[4]) - features.append(Feature(geometry=geom, properties=entry)) + #log.info(f"Looking for overlapping buildings in \"{self.uri['dbname']}\", this make take awhile...") + #print(sql) + # Views must be dropped in the right order + sql = f"DROP TABLE IF EXISTS dups_view CASCADE; DROP TABLE IF EXISTS osm_view CASCADE;DROP TABLE IF EXISTS ways_view CASCADE;" + result = self.db.queryDB(sql) - log.debug(f"{len(features)} overlapping features found") + if self.boundary: + self.db.clipDB(self.boundary) log.debug(f"Clipping OSM database") ewkt = shape(self.boundary) uri = uriParser(dburi) log.debug(f"Extracting OSM subset from \"{uri['dbname']}\"") - sql = f"DROP VIEW IF EXISTS osm_view CASCADE;CREATE VIEW osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom)" + sql = f"CREATE TABLE osm_view AS SELECT osm_id,tags,geom FROM dblink('dbname={uri['dbname']}', 'SELECT osm_id,tags,geom FROM ways_poly') AS t1(osm_id int, tags jsonb, geom geometry) WHERE ST_CONTAINS(ST_GeomFromEWKT('SRID=4326;{ewkt}'), geom) AND tags->>'building' IS NOT NULL" + # print(sql) result = self.db.queryDB(sql) - sql = f"DROP VIEW IF EXISTS dups_view;CREATE VIEW dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g2.osm_id AS id2,g2.geom as geom2 FROM {self.view} AS g1, osm_view AS g2 WHERE ST_OVERLAPS(g1.geom, g2.geom) AND (g1.tags->>'building' IS NOT NULL AND g2.tags->>'building' IS NOT NULL)" + sql = f"CREATE TABLE dups_view AS SELECT ST_Area(ST_INTERSECTION(g1.geom::geography, g2.geom::geography)) AS area,g1.osm_id AS id1,g1.geom as geom1,g1.tags AS tags1,g2.osm_id AS id2,g2.geom as geom2, g2.tags AS tags2 FROM ways_view AS g1, osm_view AS g2 WHERE ST_INTERSECTS(g1.geom, g2.geom) AND g2.tags->>'building' IS NOT NULL" + print(sql) result = self.db.queryDB(sql) - sql = "SELECT area,id1,geom1,id2,geom2 FROM dups_view" + def cleanDuplicates(self): + """ + Delete the entries from the duplicate building view. + + Returns: + (FeatureCollection): The entries from the datbase table + """ + log.debug(f"Removing duplicate buildings from ways_view") + sql = f"DELETE FROM ways_view WHERE osm_id IN (SELECT id1 FROM dups_view)" + result = self.db.queryDB(sql) + return True - log.debug(f"{len(result)} duplicates found") + def getNew(self): + """ + Get only the new buildings + + Returns: + (FeatureCollection): The entries from the datbase table + """ + sql = f"SELECT osm_id,geom,tags FROM ways_view" + result = self.db.queryDB(sql) + features = list() + for item in result: + # log.debug(item) + entry = {'osm_id': item[0]} + entry.update(item[2]) + geom = wkb.loads(item[1]) + features.append(Feature(geometry=geom, properties=entry)) + + log.debug(f"{len(features)} new features found") + return FeatureCollection(features) + + def findHighway(self, + feature: Feature, + ): + """ + Find the nearest highway to a feature + + Args: + feature (Feature): The feature to check against + """ + pass + + def getDuplicates(self): + """ + Get the entries from the duplicate building view. + + Returns: + (FeatureCollection): The entries from the datbase table + """ + sql = f"SELECT area,id1,geom1,tags1,id2,geom2,tags2 FROM dups_view" + result = self.db.queryDB(sql) features = list() for item in result: + #log.debug(item) # First building identified entry = {'area': float(item[0]), 'id': int(item[1])} - # FIXME: Do we want to filter by the size of the overlap ? - # It's not exactly a reliable number since buildings are - # different sizes. - # if entry['area'] < 0.04: - # log.debug(f"FOO: {entry['area']}") - # continue geom = wkb.loads(item[2]) + entry.update(item[3]) features.append(Feature(geometry=geom, properties=entry)) + # Second building identified - entry['id'] = int(item[3]) - geom = wkb.loads(item[4]) + entry = {'area': float(item[0]), 'id': int(item[4])} + entry['id'] = int(item[4]) + geom = wkb.loads(item[5]) + entry.update(item[6]) + # FIXME: Merge the tags from the buildings into the OSM feature + # entry.update(item[3]) features.append(Feature(geometry=geom, properties=entry)) - # FIXME: debug only! - bar = open("foo.geojson", 'w') - geojson.dump(FeatureCollection(features), bar) + log.debug(f"{len(features)} duplicate features found") return FeatureCollection(features) def main(): @@ -183,7 +230,19 @@ def main(): poly = boundary['geometry'] cdb = ConflateBuildings(args.dburi, poly) cdb.overlapDB(args.osmuri) - # log.info(f"Wrote {args.outfile}") + features = cdb.getDuplicates() + + # FIXME: These are only for debugging + file = open("foo.geojson", 'w') + geojson.dump(features, file) + log.info(f"Wrote foo.geojson for duplicates") + + cdb.cleanDuplicates() + features = cdb.getNew() + file = open("bar.geojson", 'w') + geojson.dump(features, file) + + log.info(f"Wrote bar.geojson for new buildings") if __name__ == "__main__": """This is just a hook so this file can be run standlone during development."""