diff --git a/conflator/conflator.py b/conflator/conflator.py
index 8647b7c..46b0a8c 100755
--- a/conflator/conflator.py
+++ b/conflator/conflator.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python3
 
-# Copyright (c) 2021, 2022, 2023 Humanitarian OpenStreetMap Team
+# Copyright (c) 2021, 2022, 2023,  2024 Humanitarian OpenStreetMap Team
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
@@ -21,23 +21,29 @@
 import os
 from sys import argv
 from osm_fieldwork.osmfile import OsmFile
-from geojson import Point, Feature, FeatureCollection, dump, Polygon
+from geojson import Point, Feature, FeatureCollection, dump, Polygon, load
 import geojson
-import psycopg2
 from shapely.geometry import shape, Polygon, mapping
 import shapely
 from shapely import wkt
-import xmltodict
 from progress.bar import Bar, PixelBar
 from progress.spinner import PixelSpinner
 from osm_fieldwork.convert import escape
+from osm_fieldwork.parsers import ODKParsers
 from osm_rawdata.postgres import PostgresClient, uriParser
+from geosupport import GeoSupport
+# from conflator.geosupport import GeoSupport
 from codetiming import Timer
 import concurrent.futures
 from cpuinfo import get_cpu_info
 from time import sleep
 from haversine import haversine, Unit
 from thefuzz import fuzz, process
+from pathlib import Path
+from osm_fieldwork.parsers import ODKParsers
+from pathlib import Path
+from spellchecker import SpellChecker
+# from deepdiff import DeepDiff
 
 # Instantiate logger
 log = logging.getLogger(__name__)
@@ -47,90 +53,64 @@
 cores = info['count']
 
 
+# A function that returns the 'year' value:
+def distSort(data: list):
+    """
+    Args:
+        data (list): The data to sort
+    """
+    return data['dist']
+
 class Conflator(object):
     def __init__(self,
-                 source: str,
+                 uri: str,
                  boundary: str = None,
                  ):
         """
         Initialize Input data source
 
         Args:
-            source (str): The source URI
+            source (str): The source URI or file
+            uri (str): URI for the primary database
             boundary: str = None
 
         Returns:
-            (OdkMerge): An instance of this object
+            (Conflator): An instance of this object
         """
         self.postgres = list()
-        self.source = source
         self.tags = dict()
+        self.boundary = None
         # Distance in meters for conflating with postgis
         self.tolerance = 7
         self.data = dict()
-        self.analyze = ("name", "amenity", "landuse", "cuisine", "tourism", "leisure")
-        # PG: is the same prefix as ogr2ogr
-        # "[user[:password]@][netloc][:port][/dbname]"
-        if source[0:3] == "PG:":
-            uri = uriParser(source[3:])
-            # self.source = "underpass" is not support yet
-            # Each thread needs it's own connection to postgres to avoid problems.
-            for thread in range(0, cores + 1):
-                db = PostgresClient(dbhost=uri['dbhost'], dbname=uri['dbname'], dbuser=uri['dbuser'], dbpass=uri['dbpass'])
-                self.postgres.append(db)
-                if boundary:
-                    self.clip(boundary, db)
-        else:
-            log.info("Opening data file: %s" % source)
-            src = open(source, "r")
-            self.data = geojson.load(src)
+        self.analyze = ("building", "name", "amenity", "landuse", "cuisine", "tourism", "leisure")
+        # uri = uriParser(source[3:])
+        # self.source = "underpass" is not support yet
+        # Each thread needs it's own connection to postgres to avoid problems.
+        for thread in range(0, cores + 1):
+            db = GeoSupport(uri)
+            self.postgres.append(db)
             if boundary:
-                self.clip(boundary)
-
-    def clip(self,
-             boundary: Polygon,
-             db: PostgresClient,
-             ):
-        """
-        Clip a data source by a boundary
-
-        Args:
-             boundary (str): The filespec of the project AOI
-             db (PostgresClient): A reference to the existing database connection
-
-        Returns:
-            (bool): If the region was clipped sucessfully
-        """
-        remove = list()
-        if not boundary:
-            return False
-
-        if 'features' in boundary:
-            poly = geom['features'][0]['geometry']
-        else:
-            poly = geom["geometry"]
-        ewkt = shape(poly)
-
-        if len(self.data) > 0:
-            # TODO: FMTM produces data extracts the exact size of the boundary
-            # polygon, so  we don't need to clip it. In the future though we
-            # want this to produce a subset from a larger file.
-            for feature in self.data['features']:
-                entry = shapely.from_geojson(str(feature))
-            #     if not shapely.contains(ewkt, entry):
-            #         log.debug(f"CONTAINS {entry}")
-            #         del self.data[self.data['features']]
-            pass
-        else:
-            # setup the postgres VIEWs with a dummy SQL query
-            sql = f"SELECT COUNT(osm_id) FROM nodes"
-            result = db.queryLocal(sql, ewkt)
-        return True
+                self.boundary = boundary
+                db.clipDB(boundary)
+
+        # log.info("Opening data file: %s" % source)
+        # toplevel = Path(source)
+        # if toplevel.suffix == ".geosjon":
+        #     src = open(source, "r")
+        #     self.data = geojson.load(src)
+        # elif toplevel.suffix == ".osm":
+        #     src = open(source, "r")
+        #     osmin = OsmFile()
+        #     self.data = osmin.loadFile(source) # input file
+        #     if boundary:
+        #         gs = GeoSupport(source)
+        #         # self.data = gs.clipFile(self.data)
 
     def makeNewFeature(self,
                        attrs: dict = None,
                        tags: dict = None,
-                       ):
+                       ) -> dict:
         """
         Create a new feature with optional data
 
@@ -152,227 +132,262 @@ def makeNewFeature(self,
             newf['tags'] = dict()
         return newf
 
-    def conflateFile(self,
-                     feature: dict,
-                     ):
+    def checkTags(self,
+                  feature: Feature,
+                  osm: dict,
+                  ):
         """
-        Conflate a POI against all the features in a GeoJson file
+        Check tags between 2 features.
 
         Args:
-            feature (dict): The feature to conflate
+            feature (Feature): The feature from the external dataset
+            osm (dict): The result of the SQL query
 
         Returns:
-            (dict):  The modified feature
+            (int): The number of tag matches
+            (dict): The updated tags
         """
-        # Most smartphone GPS are 5-10m off most of the time, plus sometimes
-        # we're standing in front of an amenity and recording that location
-        # instead of in the building.
-        gps_accuracy = 10
-        # this is the treshold for fuzzy string matching
+        tags = osm['tags']
+        hits = 0
         match_threshold = 80
-        # log.debug(f"conflateFile({feature})")
-        hits = False
-        data = dict()
-        geom = Point((float(feature["attrs"]["lon"]), float(feature["attrs"]["lat"])))
-        wkt = shape(geom)
-        for existing in self.data['features']:
-            id = int(existing['properties']['id'])
-            entry = shapely.from_geojson(str(existing))
-            if entry.geom_type != 'Point':
-                center = shapely.centroid(entry)
-            else:
-                center = entry
-                # dist = shapely.hausdorff_distance(center, wkt)
-                # if 'name' in existing['properties']:
-                #     print(f"DIST1: {dist}, {existing['properties']['name']}")
-            # x = shapely.distance(wkt, entry)
-            # haversine reverses the order of lat & lon from what shapely uses. We
-            # use this as meters is easier to deal with than cartesian coordinates.
-            x1 = (center.coords[0][1], center.coords[0][0])
-            x2 = (wkt.coords[0][1], wkt.coords[0][0])
-            dist = haversine(x1, x2, unit=Unit.METERS)
-            if dist < gps_accuracy:
-                # if 'name' in existing['properties']:
-                # log.debug(f"DIST2: {dist}")
-                # log.debug(f"Got a Hit! {feature['tags']['name']}")
-                for key,value in feature['tags'].items():
-                    if key in self.analyze:
-                        if key in existing['properties']:
-                            result = fuzz.ratio(value, existing['properties'][key])
-                            if result > match_threshold:
-                                # log.debug(f"Matched: {result}: {feature['tags']['name']}")
-                                existing['properties']['fixme'] = "Probably a duplicate!"
-                                log.debug(f"Got a dup in file!!! {existing['properties']['name'] }")
-                                hits = True
-                                break
-            if hits:
-                version = int(existing['properties']['version'])
-                # coords = feature['geometry']['coordinates']
-                # lat = coords[1]
-                # lon = coords[0]
-                attrs = {'id': id, 'version': version, 'lat': feature['attrs']['lat'], 'lon': feature['attrs']['lon']}
-                tags = existing['properties']
-                tags['fixme'] = "Probably a duplicate!"
-                # Data extracts for ODK Collect
-                del tags['title']
-                del tags['label']
-                if 'building' in tags:
-                    return {'attrs': attrs, 'tags': tags, 'refs': list()}
-                return {'attrs': attrs, 'tags': tags}
-        return dict()
-
-    def conflateWay(self,
-                    feature: dict,
-                    dbindex: int,
+        if osm['tags']['dist'] > float(self.tolerance):
+            return 0, osm['tags']
+        for key, value in feature['tags'].items():
+            if key in tags:
+                ratio = fuzz.ratio(value, tags[key])
+                if ratio > match_threshold:
+                    hits += 1
+                else:
+                    if key != 'note':
+                        tags[f'old_{key}'] = value
+            tags[key] = value
+
+        return hits, tags
+
+    def conflateFiles(self,
+                    odkspec: str,
+                    osmspec: str,
+                    threshold: int = 10,
                     ):
         """
-        Conflate a POI against all the ways in a postgres view
+        Open the two source files and contlate them.
 
         Args:
-            feature (dict): The feature to conflate
-            dbindex (int): An index into the array of postgres connections
+            odkspec (dict): The features from ODK to conflate
+            osmspec (dict): The existing OSM data
+            threshold (int): Threshold for distance calculations
 
         Returns:
-            (dict):  The modified feature
+            (dict):  The conflated output
         """
-        # log.debug(f"conflateWay({feature})")
-        hits = False
-        result = list()
-        geom = Point((float(feature["attrs"]["lon"]), float(feature["attrs"]["lat"])))
-        wkt = shape(geom)
-        for key, value in feature['tags'].items():
-            if key in self.analyze:
-                # Sometimes the duplicate is a polygon, really common for parking lots.
-                cleanval = escape(value)
-                query = f"SELECT osm_id,tags,version,ST_AsText(ST_Centroid(geom)) FROM ways_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;{wkt.wkt}\')) < {self.tolerance} AND levenshtein(tags->>'{key}', '{cleanval}') <= 1"
-                # log.debug(query)
-                self.postgres[dbindex].dbcursor.execute(query)
-                try:
-                    result = self.postgres[dbindex].dbcursor.fetchall()
-                except:
-                    result = list()
-                    # log.warning(f"No results at all for {query}")
-                if len(result) > 0:
-                    hits = True
-                    break
-        if hits:
-            log.debug(f"Got a dup in ways!!! {feature['tags']['name']}")
-            # the result is a list from what we specify for SELECT
-            version = int(result[0][2]) + 1
-            attrs = {'id': int(result[0][0]), 'version': version}
-            tags = result[0][1]
-            tags[f'old_{key}'] = value
-            tags['fixme'] = "Probably a duplicate!"
-            geom = mapping(shapely.from_wkt(result[0][3]))
-            refs = list()
-            # FIXME: iterate through the points and find the existing nodes,
-            # which I'm not sure
-            # is possible
-            # SELECT osm_id,tags,version FROM nodes WHERE ST_Contains(geom, ST_GeomFromText('Point(-105.9918636 38.5360821)'));
-            # for i in geom['coordinates'][0]:
-            #    print(f"XXXXX: {i}")
-            return {'attrs': attrs, 'tags': tags, 'refs': refs}
-        return dict()
-
-    def conflateNode(self,
-                     feature: dict,
-                     dbindex: int,
-                     ):
+        odkdata = list()
+        osmdata = list()
+
+        # The collected data from ODK
+        odkpath = Path(odkspec)
+        if odkpath.suffix == '.geojson':
+            log.debug(f"Parsing GeoJson files {odkspec}")
+            odkfile = open(odkspec, 'r')
+            features = geojson.load(odkfile)
+            odkdata = features['feature']
+        elif odkpath.suffix == '.osm':
+            log.debug(f"Parsing OSM XML files {odkspec}")
+            osmfile = OsmFile()
+            odkdata = osmfile.loadFile(odkspec)
+        elif odkpath.suffix == ".csv":
+            log.debug(f"Parsing csv files {odkspec}")
+            odk = ODKParsers()
+            for entry in odk.CSVparser(odkspec):
+                odkdata.append(odk.createEntry(entry))
+        elif odkpath.suffix == ".json":
+            log.debug(f"Parsing json files {odkspec}")
+            odk = ODKParsers()
+            for entry in odk.JSONparser(odkspec):
+                odkdata.append(odk.createEntry(entry))
+
+        # The data extract from OSM
+        osmpath = Path(osmspec)
+        if osmpath.suffix == '.geojson':
+            osmfile = open(osmspec, 'r')
+            features = geojson.load(osmfile)
+            osmdata = features['features']
+        if osmpath.suffix == '.osm':
+            osmfile = OsmFile()
+            osmdata = osmfile.loadFile(osmspec)
+
+        return self.conflateFeatures(odkdata, osmdata, threshold)
+
+    def conflateFeatures(self,
+                    odkdata: list,
+                    osmdata: list,
+                    threshold: int = 1,
+                    spellcheck: bool = True,
+                    ):
         """
-        Conflate a POI against all the nodes in the view
+        Conflate features from ODK against all the features in OSM.
 
         Args:
-            feature (dict): The feature to conflate
-            dbindex (int): An index into the array of postgres connections
+            odkdata (list): The features from ODK to conflate
+            osmdata (list): The existing OSM data
+            threshold (int): Threshold for distance calculations
+            spellcheck (bool): Whether to also spell check string values
 
         Returns:
-            (dict):  The modified feature
-        """
-        # log.debug(f"conflateNode({feature})")
-        hits = False
-        geom = Point((float(feature["attrs"]["lon"]), float(feature["attrs"]["lat"])))
-        wkt = shape(geom)
-        result = list()
-        ratio = 1
-        for key,value in feature['tags'].items():
-            if key in self.analyze:
-                # print("%s = %s" % (key, value))
-                # Use a Geography data type to get the answer in meters, which
-                # is easier to deal with than degress of the earth.
-                cleanval = escape(value)
-                query = f"SELECT osm_id,tags,version,ST_AsEWKT(geom) FROM nodes_view WHERE ST_Distance(geom::geography, ST_GeogFromText(\'SRID=4326;{wkt.wkt}\')) < {self.tolerance} AND levenshtein(tags->>'{key}', '{cleanval}') <= {ratio}"
-                # print(query)
-                # FIXME: this currently only works with a local database, not underpass yet
-                self.postgres[dbindex].dbcursor.execute(query)
-                try:
-                    result = self.postgres[dbindex].dbcursor.fetchall()
-                except:
-                    result = list()
-                    # log.warning(f"No results at all for {query}")
-                if len(result) > 0:
-                    hits = True
-                    break
-        if hits:
-            log.debug(f"Got a dup in nodes!!! {feature['tags']}")
-            version = int(result[0][2]) + 1
-            coords = shapely.from_wkt(result[0][3][10:])
-            lat = coords.y
-            lon = coords.x
-            attrs = {'id': int(result[0][0]), 'version': version, 'lat': lat, 'lon': lon}
-            tags = result[0][1]
-            tags[f'old_{key}'] = value
-            tags['fixme'] = "Probably a duplicate!"
-            return {'attrs': attrs, 'tags': tags}
-        return dict()
-
-    def conflateById(self,
-                     feature: dict,
-                     dbindex: int,
-                     ):
+            (list):  The conflated output
         """
-        Conflate a feature with existing ways using the OSM ID
+        timer = Timer(text="conflateFeatures() took {seconds:.0f}s")
+        timer.start()
 
-        Args:
-            feature (dict): The feature to conflate
-            dbindex (int): An index into the array of postgres connections
+        # ODK data is always a single node when mapping buildings, but the
+        # OSM data will be a mix of nodes and ways. For the OSM data, the
+        # building centroid is used.
 
-        Returns:
-            (dict):  The modified feature
-        """
-        log.debug(f"conflateById({feature})")
-        id = int(feature['attrs']['id'])
-        if id > 0:
-            if self.source[:3] != "PG:":
-                sql = f"SELECT osm_id,tags,version,ST_AsText(geom) FROM ways_view WHERE tags->>'id'='{id}'"
-                # log.debug(sql)
-                self.postgres[0].dbcursor.execute(sql)
-                result = self.postgres[0].dbcursor.fetchone()
-                if result:
-                    version = int(result[0][2]) + 1
-                    attrs = {'id': int(result[0][0]), 'version': version}
-                    tags = result[0][1]
-                    # tags[f'old_{key}'] = value
-                    tags['fixme'] = "Probably a duplicate!"
-                    geom = mapping(shapely.from_wkt(result[0][3]))
-                    return {'attrs': attrs, 'tags': tags}
+        # Most smartphone GPS are 5-10m off most of the time, plus sometimes
+        # we're standing in front of an amenity and recording that location
+        # instead of in the building.
+        # gps_accuracy = 10
+        # this is the treshold for fuzzy string matching
+        match_threshold = 80
+        data = list()
+        # New features not in OSM always use negative IDs
+        odkid = -100
+        osmid = 0
+        nodes = dict()
+        version = 0
+        for entry in odkdata:
+            confidence = 0
+            maybe = list()
+            odktags = dict()
+            osmtags = dict()
+            feature = dict()
+            newtags = dict()
+            geom = None
+            if 'attrs' in entry:
+                # The source came from an OSM XML file
+                geom = Point((float(entry["attrs"]["lon"]), float(entry["attrs"]["lat"])))
+                odktags = entry['tags']
+            elif 'coordinates' in entry:
+                # The source came from a GeoJson file
+                gps = entry['coordinates']
+                geom = Point(float(gps[0]), float(gps[1]))
+                odktags = entry['properties']
+            wkt = shape(geom)
+            for existing in osmdata:
+                # We could probably do this using GeoPandas or gdal, but that's
+                # going to do the same brute force thing anyway.
+                if 'geometry' in existing:
+                    geom = existing['geometry']
+                osmwkt = shape(geom)
+                if osmwkt.geom_type != 'Point':
+                    center = shapely.centroid(osmwkt)
                 else:
-                    sql = f"SELECT osm_id,tags,version,ST_AsText(geom) FROM ways_view WHERE tags->>'id'='{id}'"
-                    # log.debug(sql)
-                    self.postgres[dbindex].dbcursor.execute(sql)
-                    result = self.postgres[dbindex].dbcursor.fetchone()
-                    if result:
-                        version = int(result[0][2]) + 1
-                        attrs = {'id': int(result[0][0]), 'version': version}
-                        tags = result[0][1]
-                        # tags[f'old_{key}'] = value
-                        tags['fixme'] = "Probably a duplicate!"
-                        geom = mapping(shapely.from_wkt(result[0][3]))
-                    return {'attrs': attrs, 'tags': tags, 'refs': refs}
-            else:
-                for key, value in self.data.items():
-                    if key == id:
-                        return value
-        return dict()
+                    center = shape(osmwkt)
+                # dist = shapely.hausdorff_distance(center, wkt)
+                dist = wkt.distance(center)
+                if dist < threshold:
+                    # cache all OSM features within our threshold distance
+                    # These are needed by ODK, but duplicates of other fields,
+                    # so they aren't needed and just add more clutter.
+                    maybe.append({"dist": dist, "odk": entry, "osm": existing})
+
+            # Compare tags for everything that got cached
+            hits = 0
+            if len(maybe) > 0:
+                # cache the refs to use in the OSM XML output file
+                refs = list()
+                odk = dict()
+                osm = dict()
+                # After sorting, the first entry is the closet feature
+                maybe.sort(key=distSort)
+                # make consistent data structures from different input formats
+                if 'properties' in maybe[0]["odk"]:
+                    odk['tags'] = maybe[0]["odk"]['properties']
+                    gps = maybe[0]['geometry']
+                    odk['attrs']= {'id': odkid, 'lat': gps[0], 'lon': gps[1]}
+                    odkversion = odk['properties']['version']
+                    if 'title' in odk:
+                        del odk['title']
+                    if 'label' in odk:
+                        del odk['label']
+                elif 'attrs' in maybe[0]["odk"]:
+                    odk['tags'] = maybe[0]["odk"]['tags']
+                    odk['attrs'] = maybe[0]["odk"]['attrs']
+
+                if 'properties' in maybe[0]["osm"]:
+                    osm['tags'] = maybe[0]["osm"]['properties']
+                    if 'title' in osm['tags']:
+                        del osm['tags']['title']
+                    if 'label' in osm['tags']:
+                        del osm['tags']['label']
+                    gps = maybe[0]['osm']['geometry']['coordinates']
+                    osm['attrs']= {'id': osm['tags']['id'], 'lat': gps[0], 'lon': gps[1]}
+                elif 'attrs' in maybe[0]["osm"]:
+                    osm['tags'] = maybe[0]["osm"]['tags']
+                    osm['attrs'] = maybe[0]["osm"]['attrs']
+                    version = int(osm['attrs']['version']) + 1
+                    if 'refs' in maybe[0]['osm']:
+                        refs = eval(maybe[0]['osm']['refs'])
+                    nodes[osm['attrs']['id']] = osm
+
+                for key, value in odk['tags'].items():
+                    # log.debug(f"Comparing: {value} == {value}")
+                    if key[:4] == "name":
+                        if 'tags' not in osm:
+                            breakpoint()
+                        # log.debug(f"Comparing: {value} == {osm['tags'][key]}")
+                        if key in osm['tags']:
+                            if key not in osm['tags']:
+                                continue
+                            result = fuzz.ratio(value, osm['tags'][key])
+                            if result > match_threshold:
+                                log.debug(f"Matched: {result}: {key} = {value}")
+
+                                log.debug(f"Got a dup in file!!! {odktags}")
+                                hits += 1
+                                confidence = result
+                                # FIXME: if 100%, perfect match, less than
+                                # that probably contains a spelling mistake.
+                    else:
+                        if odk['tags'] == osm['tags']:
+                            # this would be an exact match in tags between odk and osm.
+                            # unlikely though.
+                            hits += 1
+                        else:
+                            # diff = DeepDiff(osm['tags'], odk['tags'])
+                            # see if the ODK key exists in the OSM tags
+                            if key in osm['tags']:
+                                hits += 1
+
+            if hits > 0:
+                # log.debug(f"HITS: {hits}")
+                # If there have been hits, it's probably a duplicate
+                attrs = {"id": osm['attrs']["id"], "version": version, 'lat': osm['attrs']['lat'], 'lon': osm['attrs']['lon']}
+                newtags = odktags | osmtags
+                # These are added by ODK Collect, and not relevant for OSM
+                # del newtags['id']
+                if "refs" in newtags:
+                    del newtags['refs']
+                # if "properties" in existing:
+                #     attrs["id"] = existing["properties"]["id"]
+                # else:
+                #     attrs["id"] = existing["attrs"]["id"]
+                newtags['fixme'] = "Probably a duplicate!"
+                newtags['confidence'] = hits
+                if len(refs) == 0:
+                    feature = {"attrs": attrs, "version": version, "tags": newtags}
+                else:
+                    feature = {"attrs": attrs, "version": version, "refs": refs, "tags": newtags}
+                # data.append(feature)
+
+            # If no hits, it's new data. ODK data is always just a POI for now
+            feature["attrs"] = {"id": odkid, "lat": entry["attrs"]["lat"], "lon": entry["attrs"]["lon"], "version": version, "timestamp": entry["attrs"]["timestamp"]}
+            feature["tags"] = odktags
+            # print(f"{odkid}: {odktags}")
+            odkid -= 1
+            data.append(feature)
+
+        timer.stop()
+        return data
 
     def cleanFeature(self,
                      feature: dict,
@@ -405,27 +420,41 @@ def dump(self):
         #         print(f"{k}(v{self.versions[k]}) = {v}")
 
     def conflateData(self,
-                     odkdata: list,
-                     ):
+                     source: str,
+                     ) -> dict:
         """
         Conflate all the data. This the primary interfacte for conflation.
 
         Args:
-            odkdata (list): A list of all the entries in the OSM XML input file
+            source (str): The source file to conflate
 
         Returns:
-            (dict):  The modified features
+            (dict):  The conflated features
         """
         timer = Timer(text="conflateData() took {seconds:.0f}s")
         timer.start()
+
+        log.info("Opening data file: %s" % source)
+        toplevel = Path(source)
+        if toplevel.suffix == ".geosjon":
+            src = open(source, "r")
+            self.data = geojson.load(src)
+        elif toplevel.suffix == ".osm":
+            src = open(source, "r")
+            osmin = OsmFile()
+            self.data = osmin.loadFile(source) # input file
+            if self.boundary:
+                gs = GeoSupport(source)
+                # self.data = gs.clipFile(self.data)
+
         # Use fuzzy string matching to handle minor issues in the name column,
         # which is often used to match an amenity.
         if len(self.data) == 0:
-            self.postgres[0].dbcursor.execute("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch")
-        log.debug(f"OdkMerge::conflateData() called! {len(odkdata)} features")
+            self.postgres[0].query("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch")
+        # log.debug(f"OdkMerge::conflateData() called! {len(odkdata)} features")
 
         # A chunk is a group of threads
-        chunk = round(len(odkdata) / cores)
+        chunk = round(len(self.data) / cores)
 
         # cycle = range(0, len(odkdata), chunk)
 
@@ -434,11 +463,15 @@ def conflateData(self,
         future = None
         result = None
         index = 0
+        if True:                # DEBUGGING HACK ALERT!
+            result = conflateThread(self.data, self, index)
+            return dict()
+
         with concurrent.futures.ThreadPoolExecutor(max_workers=cores) as executor:
             i = 0
             subset = dict()
             futures = list()
-            for key, value in odkdata.items():
+            for key, value in self.data.items():
                 subset[key] = value
                 if i == chunk:
                     i = 0
@@ -457,16 +490,27 @@ def conflateData(self,
         return newdata
         # return alldata
 
-    def outputOSM(self,
-                  data: FeatureCollection,
-                  ):
-        """
-        Output in OSM XML format
+    def writeOSM(self,
+                 data: dict,
+                 filespec: str,
+                 ):
+        osm = OsmFile(filespec)
+        for entry in data:
+            out = str()
+            if 'refs' in entry:
+                if len(entry['refs']) > 0:
+                    out = osm.createWay(entry, True)
+            else:
+                out = osm.createNode(entry, True)
+            if len(out) > 0:
+                osm.write(out)
 
-        Args:
-            data (FeatureCollection): The data to convert
-        """
-        pass
+    def writeGeoJson(self,
+                 data: dict,
+                 filespec: str,
+                 ):
+        for entry in data:
+            pass
 
 def conflateThread(features: dict,
                    source: str,
@@ -479,8 +523,8 @@ def conflateThread(features: dict,
         feature (dict): The feature to conflate
         source (str): The data source for conflation, file or database
         dbindex (int): An index into the array of postgres connections
-
     Returns:
+        (list): the conflated data output
     """
     timer = Timer(text="conflateThread() took {seconds:.0f}s")
     timer.start()
@@ -504,12 +548,9 @@ def conflateThread(features: dict,
                 # using geopoint in the XLSForm.
                 result = source.conflateById(value, dbindex)
         elif id < 0:
-            if source.source[:3] != "PG:":
-                result = source.conflateFile(value)
-            else:
-                result = source.conflateNode(value, dbindex)
-                if len(result) == 0:
-                    result = source.conflateWay(value, dbindex)
+            result = source.conflateNode(value, dbindex)
+            if len(result) == 0:
+                result = source.conflateWay(value, dbindex)
         if result and len(result) > 0:
             # Merge the tags and attributes together, the OSM data and ODK data.
             # If no match is found, the ODK data is used to create a new feature.
@@ -534,25 +575,31 @@ def main():
     parser = argparse.ArgumentParser(
         prog="conflator",
         formatter_class=argparse.RawDescriptionHelpFormatter,
-        description="This program conflates external data with existing features from OSM.",
+        description="This program conflates external data with existing features in OSM.",
         epilog="""
-    This program conflates external datasets with OSM data using a postgresql database.
+    This program conflates external datasets with OSM data. It can use a postgres
+database, or a GeoJson and OSM XML files as the input sources.
+
+        Examples:
+                To conflate two files
+         ./conflator.py -v -s camping-2024_06_14.osm -e extract.geojson
+
+                To conflate a file using postgres
+         ./conflator.py -v -s camping-2024_06_14.geojson -u localhost/usa -b utah.geojson
+        
         """,
     )
     parser.add_argument("-v", "--verbose", action="store_true", help="verbose output")
+    parser.add_argument("-u", "--uri", help="OSM Database URI")
+    parser.add_argument("-e", "--extract", help="The OSM data extract")
+    parser.add_argument("-s", "--source", required=True, help="The ODK data to conflate")
+    parser.add_argument("-t", "--threshold", default=1, help="Threshold for distance calculations")
     parser.add_argument("-o", "--outfile", help="Output file from the conflation")
-    parser.add_argument("-i", "--infile", help="GeoJson or OSM XML file to conflate")
-    parser.add_argument("-b", "--boundary", help="Boundary polygon to limit the data size")
+    parser.add_argument("-b", "--boundary", help="Optional boundary polygon to limit the data size")
 
-    args, unknown = parser.parse_known_args()
-    osmdata = None
+    args = parser.parse_args()
+    indata = None
     source = None
-    if len(unknown) < 2:
-        parser.print_help()
-        quit()
-    else:
-        osmdata = unknown[0]
-        source = unknown[1]
 
     # if verbose, dump to the terminal.
     if args.verbose:
@@ -565,50 +612,30 @@ def main():
         ch.setFormatter(formatter)
         log.addHandler(ch)
 
+    if not args.extract and not args.uri:
+        parser.print_help()
+        log.error("You must supply a database URI or a data extract file!")
+        quit()
+
+    outfile = None
     if args.outfile:
         outfile = args.outfile
     else:
-        outfile = os.path.basename(osmdata.replace('.osm', '-foo.osm'))
+        toplevel = Path(args.source)
 
-    # This is the existing OSM data, a database or a file
-    if args.boundary:
-        extract = OdkMerge(source, args.boundary)
-    else:
-        extract = OdkMerge(source)
+    conflate = Conflator(args.uri)
 
-    if extract:
-        odkf = OsmFile(outfile) # output file
-        osm = odkf.loadFile(osmdata) # input file
-        #odkf.dump()
-    else:
-        log.error("No ODK data source specified!")
-        parser.print_help()
-        quit()
+    if args.extract is not None and len(args.extract) > 0:
+        data = conflate.conflateFiles(args.source, args.extract, int(args.threshold))
 
-    # This returns a list of lists of dictionaries. Each thread returns
-    # a list of the features, and len(data) is thre number of CPU cores.
-    data = extract.conflateData(osm)
-    out = list()
-    #print(data)
-    for entry in data:
-        # if 'refs' in feature or 'building' in feature['tags']:
-        for feature in entry:
-            if 'refs' in feature:
-                feature['refs'] = list()
-                out.append(odkf.createWay(feature, True))
-            else:
-                out.append(odkf.createNode(feature, True))
-
-    # out = ""
-    # for id, feature in osm.items():
-    #     result = extract.conflateFile(feature)
-    #     if len(result) > 0:
-    #         node = odkf.featureToNode(result)
-    #     else:
-    #         node = feature
-    #     out += odkf.createNode(node, True)
-    odkf.write(out)
-    log.info(f"Wrote {outfile}")
+    jsonout = f"{toplevel.stem}-out.geojson"
+    osmout = f"{toplevel.stem}-out.osm"
+
+    conflate.writeOSM(data, osmout)
+    conflate.writeGeoJson(data, jsonout)
+
+    log.info(f"Wrote {osmout}")
+    log.info(f"Wrote {jsonout}")
 
 if __name__ == "__main__":
     """This is just a hook so this file can be run standlone during development."""
diff --git a/docs/odkconflation.md b/docs/odkconflation.md
new file mode 100644
index 0000000..96df751
--- /dev/null
+++ b/docs/odkconflation.md
@@ -0,0 +1,236 @@
+# Conflating OpenDataKit with OpenStreetMap
+
+Typically conflation is done when doing data imports, but not
+always. Data collected in the field can be considered an
+import. Conflating buildings or POIs from external data is relatively
+easy as it's already been cleaned up and validated. When you are doing
+field mapping, then you have to cleanup and validate the data during
+conflation. This is a time consuming process even with good conflation
+software.
+
+I've worked with multiple conflation software over the
+years. [Hootenanny](https://github.com/ngageoint/hootenanny),
+[OpenJump](http://www.openjump.org/) (later forked into RoadMatcher),
+etc...  which currently are now dead projects. Conflation is a hard
+technical challenge and often the results are poor and
+unstatisfing result. For smalller datasets often it's easier to do do
+manual conflation using [JOSM](https://josm.openstreetmap.de/) or
+[Qgis](https://qgis.org/en/site/). This project tries to simply the
+problem by focusing on OpenStreetMap data.
+
+## Smartphone Data Collection
+
+While commercial organizations may use expensive GPS devices, most of
+us that do data collection as a volunteer or for an NGO use their
+smartphone. Their is a variety of smartphone apps for data collection
+that fall ihnto two categories. The first category are the apps like
+[Vespucci](http://vespucci.io/),
+[StreetComplete](https://streetcomplete.app/), and [Organic
+Maps](https://organicmaps.app/). These directly upload to
+[OpenStreetMap](https://www.openstreetmap.org). These are great for
+the casual mapper who only adds data occasionally and is limited to a
+POI. For example, a casual mapper may want to add the restaurant they
+are currrently eating in when they notices it's not in
+OpenStreetMap. In addition, they probably have a cell phone
+connection, so the data gets added right away.
+
+The other category are apps like [ODK Collect](https://getodk.org/),
+[QField](https://qfield.org/) [ArcGIS Field
+Maps](https://www.arcgis.com/apps/fieldmaps/) which are oriented to
+larger scale mapping projects, often offline without any cellular
+connection. These collect a lot of data that then needs to get
+processed later. And conflation is part of this process.
+
+All of these smartphone based data collection apps suffer from poor
+GPS location accuracy. Modern smartphones (2024) are often 5-9 meters
+off the actual location, sometimes worse. In addition when field data
+collecting, you can't always record the actual location you want, you
+can only record where you are standing.
+
+You can improve the location data somewhat if you have a good quality
+basemap, for example you see a building within a courthouse wall when
+you are standing in the street. If you have a basemap, typically
+satellite imagery, you can touch the location on the basemap, and use
+that instead of where you are standing. Then later when conflating,
+you have a much higher chance the process will be less painful.
+
+## OpenDataKit
+
+[OpenDataKit](https://opendatakit.org/software/) is a
+format for data import forms used to collect custom data. The source
+file is a spreadsheet, called an
+[XLSForm](https://xlsform.org/en/). This gets used by the mobile app
+for the quesion and answer process defined by the XLSForm. There are
+multiple apps and projects using XLSForms, so it's well supported and
+maintained.
+
+The XLS source file syntax is a bit wierd at first, being a
+spreadsheet, so the osm-fieldwork project contains tested XLSForm
+templates for a variety of mapping project goals. These can be used to
+create efficient XForms that are easy to convert to OSM. The primary
+task when manually converting ODK collected data into OSM format is
+converting the tags. If the XLSForm is created with a focus towards
+OSM the XLSForm can make this a much simpler process. This [is
+detailed](https://www.senecass.com/projects/Mapping/tech/ImproveXLSForms.html)
+more in this document. Simply stated, what is in the *name* colum in
+the XLSForm becomes the *name* of the tag in OSM, and the response
+from the choices sheet becomes the value.
+
+### ODK Collect & Central
+
+[ODK Collect](https://getodk.org/) is a mobile app for data collection
+using XLSForms. It's server side is [ODK
+Central](https://docs.getodk.org/central-intro/), which replaces the 
+older [ODK Aggregate](https://docs.getodk.org/aggregate-intro/). ODK
+Central manages the XLSForms downloaded to your phone, as wall as the
+submissions uploaded from your phone when back online.
+
+A related project for processing ODK data and working remotely with
+Central is [osm-fieldwork](https://hotosm.github.io/osm-fieldwork/).
+This Python project handles conversion of the various data files from
+Collect or Central, into OSM XML and GeoJson for future processing via
+editing or conflation. This is heavily used in the FMTM backend.
+
+## Field Data Collection
+  
+Collecting data in the field is to best way to add data to
+OpenStreetMap. Whether done by casual mappers adding POIs, to more
+dedicated mappers, what is reality at that moment is the key to
+keeping OSM fresh and updated. When it comes to improving the metadata
+for buildings, many have been imported with **building=yes** from remote
+mapping using the [HOT Tasking Manager](https://tasks.hotosm.org/) to
+trace buildings from satellite imagery. 
+
+But ground-truthing what kind of building it is improvers the map. It
+may be a medical clinic, restaurant, residence, etc.. who know until
+somebody stands in front of the building to collect more informsation
+about it. This may be idenifying it as a clinic or reseidence, adding
+the building material, what is the roof made of, is it's power
+non-existance, or are there solar panels or a generator ? Some
+humanitarian mapping is collecting data on public toilets, and
+community water sources for future improvements. 
+
+Knowing there is a building on the map is useful, but better yet is
+what is the building used for ? What is it made of ? Does it have AC
+or DC power ? Water available ? All of these details improve the map
+to make it more useful to others.
+
+### Field Mapping Camping Manager
+
+The [Field Mapping Camping Manager](fmtm.hotosm.org) (FMTM) is a
+project to oprganize large scale data collection using ODK Collect and
+ODK Central. It uses the osm-fieldwork project for much of the backend
+processing of the ODK data,  but is designed for large scale field
+mapping involving many people. It uses ODK Collect and ODK Central as
+the primary tools. One of the final steps in processing ODK data to
+import into OSM is conflating it with existing data. This can be done
+manually of course, but with a large number of data submissions this
+becomes tedious and time consuming. FMTM aggrgates all the data for an
+entire project, and may have thousands of submissions. This is where
+conflation is critical.
+
+# The Algorythm
+
+Currently conflation is focused on ODK with OSM. This uses the
+conflator.py program which can conflate between the ODK data and an
+OSM data extract. There are other conflation programs in this project
+for other external datasets, but uses a postgres database instead of
+two files.
+
+## The Conflator() Class
+
+This is the primary interface for conflating files. It has two primary
+endpoint. This top level endpoint is **Conflator.conflateFiles()**,
+which is used when the conflator program is run standalone. It opens
+the two disk files, parses the various formats, and generates a data
+structure used for conflation. This class uses the **Parsers()** class
+from osm-fieldwork that can parse the JSON or CSV files downloaded
+from ODK Central, or the ODK XML "instance" files when working
+offline. OPSM XML or GeoJson files are also supported. Each entry in
+the files is turned into list of python dicts to make it easier to
+compaert the data.
+
+Once the two files are read, the **Conflator.conflateFeatures()**
+endpoint takes the two lists of data and does the actual
+conflation. There is an additional parameter passed to this endpoint
+that is the threshold distance. This is used to find all features in
+the OSM data extract within that distance. Note that this is a unit of
+the earth's circumforance, not meters, so distance calulations are a
+bit fuzzy.
+
+This is a brute force conflation algorythm, not fast but it tries to
+be complete. it is comprised of two loops. The top level loops through
+the ODK data. For each ODK data entry, it finds all the OSM features
+within that threshold distance. The inner loop then uses the closest
+feature and compares the tags. This is where things get
+interesting.... If there is a *name* tag in the ODK data, this is
+string compared with the name in the closest OSM feature. Fuzzy string
+matching is used to handle minor spelling differences. Sometimes the
+mis-spelling is in the OSM data, but often when entering names of
+features on your smartphone, mis-typing occurs. If there is a 100%
+match in the name tags, then chances are the feature exists in OSM
+already.
+
+If there is no *name* tag in the ODK data, then the other tags are
+compared to try to find a possible duplicate feature. For example, a
+public toilet at a trailhead has no name, but if both ODK and OSM have
+**amenity=toilet**, then it's very likey a duplicate. If no tags
+match, then the ODK data is proably a new feature.
+
+Any time a possible duplicate is found, it is not automatically
+merged. Instead a **fixme** tag is added to the feature in the output
+file with a statement that it is potentially a duplicate. When the
+output file is loaded into JOSM, you can search for this tag to
+manually decide if it is a duplicate.
+
+## XLSForm Design
+
+Part of the key detail to improve conflation requires a carefully
+created XLSForm. There is much more detailed information on
+[XLSForm
+design](https://hotosm.github.io/osm-fieldwork/about/xlsforms/), but
+briefly whatever is in the *name* column in the *survey* sheet becomes
+the name of the tags, and whatever is in the *name* column in the
+*choices* sheet becomes the value. If you want a relatively smooth
+conflation, make sure your XLSForm uses OSM tagging schemas.
+
+If you don't follow OSM tagging, then conflation will assumme all your
+ODK data is a new feature, and you'll have to manually conflate the
+results using JOSM. That's OK for small datasets, but quickly becomes
+very tedious for the larger datasets that FMTM collects.
+
+## The Output File
+
+The output file must be in OSM XML to enable updating the ways. If the
+OSM data is a POI, viewing it in JOSM is easy. If the OSM data is a
+polygon, when loaded into JOSM, they won't appear at first. Since the
+OSM way created by conflation has preserved the *refs* used by OSM XML
+to reference the nodes, doing *update modified* in JOSM then pulls
+down the nodes and all the polygons will appear.
+
+## Conflicts
+
+There are some interesting issues to fix post conflation. ODK data is
+usually a single POI, whereas in OSM it may be a polygon. Sometimes
+though the POI is already in OSM. Remote mapping or building footprint
+imports often have a polygon with a single **building=yes** tag. If
+the POI we collected in ODK has more data, for example this building is
+a restaurant serving pizza, and is made of brick.
+
+In OSM sometimes there is a POI for an amenity, as well as a building
+polygon that were added at different times by different people.  The
+key detail for conflation is do any of the tags and values from the
+new data match existing data ?
+
+FMTM downloads a data extract from OSM using
+[osm-rawdata](https://hotosm.github.io/osm-rawdata/), and then
+filters the data extract based on what is on the choices
+sheet of the XLSForm. Otherwise Collect won't launch. Because this
+data extract does not contain all the tags that are in OSM, it creates
+conflicts. This problem is FMTM specific, and can be improved by
+making more complete data extract from OSM.
+
+When the only tag in the OSM data is **building=**, any tags from ODK
+are merged with the building polygon when possible. If the OSM feature
+has other tags, JOSM will flag this as a conflict. Then you have to
+manually merge the tags in JOSM.
diff --git a/mkdocs.yml b/mkdocs.yml
index 8b2e15a..b2db0d4 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -65,7 +65,7 @@ plugins:
 nav:
   - Home: index.md
   - About: about.md
-  - Conflation Guide: conflation.md
+  - Conflation Guide: odkconflation.md
   - Utilities:
       - Conflator: conflator.md
       - Utilities: utilities.md