Skip to content

Commit

Permalink
feat(libpostal): Use libpostal service
Browse files Browse the repository at this point in the history
BREAKING CHANGE

Use microservice-wrapper to avoid having to load libpostal locally.

Note: this now requires a new configuration section in `pelias.json`, a
top-level `services` key with the usual properties. Here's an example
full `pelias.json`:

```
{
  "api": {
    "textAnalyzer": "libpostal"
  },
  "services": {
    "libpostal": {
      "url": "http://libpostal-service-url:8080",
      "timeout": 4000
    }
  }
}
```

Fixes #106
  • Loading branch information
orangejulius committed Oct 4, 2019
1 parent 5701d44 commit 4ff9371
Show file tree
Hide file tree
Showing 11 changed files with 262 additions and 187 deletions.
2 changes: 1 addition & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"node": true,
"curly": true,
"eqeqeq": true,
"esversion": 6,
"esversion": 8,
"freeze": true,
"immed": true,
"indent": 2,
Expand Down
199 changes: 101 additions & 98 deletions api/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,120 +31,123 @@ function setup( addressDbPath, streetDbPath ){
if( 'string' !== typeof number ){ return cb( 'invalid number' ); }
if( 'string' !== typeof street ){ return cb( 'invalid street' ); }

var normalized = {
number: analyze.housenumber( number ),
street: analyze.street( street )
};
analyze.street(street, function streetAnalyzeCallback(err, street, metadata) {

// error checking
if( isNaN( point.lat ) ){ return cb( 'invalid latitude' ); }
if( isNaN( point.lon ) ){ return cb( 'invalid longitude' ); }
if( isNaN( normalized.number ) ){ return cb( 'invalid number' ); }
if( !normalized.street.length ){ return cb( 'invalid street' ); }
var normalized = {
number: analyze.housenumber( number ),
street: street
};

// perform a db lookup for the specified street
// @todo: perofmance: only query for part of the table
query.search( db, point, normalized.number, normalized.street, function( err, res ){
// error checking
if( isNaN( point.lat ) ){ return cb( 'invalid latitude' ); }
if( isNaN( point.lon ) ){ return cb( 'invalid longitude' ); }
if( isNaN( normalized.number ) ){ return cb( 'invalid number' ); }
if( !normalized.street.length ){ return cb( 'invalid street' ); }

// @note: results can be from multiple different street ids.
// perform a db lookup for the specified street
// @todo: perofmance: only query for part of the table
query.search( db, point, normalized.number, normalized.street, function( err, res ){

// an error occurred or no results were found
if( err || !res || !res.length ){ return cb( err, null ); }
// @note: results can be from multiple different street ids.

// try to find an exact match
var match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return row.housenumber === normalized.number;
});
// an error occurred or no results were found
if( err || !res || !res.length ){ return cb( err, null ); }

// return exact match
if( match ){
return cb( null, {
type: 'exact',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
// try to find an exact match
var match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return row.housenumber === normalized.number;
});
}

// try to find a close match with the same number (possibly an apartment)
match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return Math.floor( row.housenumber ) === Math.floor( normalized.number );
});
// return exact match
if( match ){
return cb( null, {
type: 'exact',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
});
}

// return close match
if( match ){
return cb( null, {
type: 'close',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
// try to find a close match with the same number (possibly an apartment)
match = res.find( function( row ){
if( row.source === 'VERTEX' ){ return false; }
return Math.floor( row.housenumber ) === Math.floor( normalized.number );
});
}

// attempt to interpolate the position

// find the records before and after the desired number (group by street segment)
var map = {};
res.forEach( function( row ){
if( !map.hasOwnProperty( row.id ) ){ map[row.id] = {}; }
if( row.housenumber < normalized.number ){ map[row.id].before = row; }
if( row.housenumber > normalized.number ){ map[row.id].after = row; }
if( map[row.id].before && map[row.id].after ){
map[row.id].diff = {
before: map[row.id].before.housenumber - normalized.number,
after: map[row.id].after.housenumber - normalized.number
};

// return close match
if( match ){
return cb( null, {
type: 'close',
source: match.source,
source_id: match.source_id,
number: analyze.housenumberFloatToString( match.housenumber ),
lat: parseFloat( match.lat.toFixed(7) ),
lon: parseFloat( match.lon.toFixed(7) )
});
}
});

// remove segments with less than 2 points; convert map to array
var segments = [];
for( var id in map ){
if( map[id].before && map[id].after ){
segments.push( map[id] );
// attempt to interpolate the position

// find the records before and after the desired number (group by street segment)
var map = {};
res.forEach( function( row ){
if( !map.hasOwnProperty( row.id ) ){ map[row.id] = {}; }
if( row.housenumber < normalized.number ){ map[row.id].before = row; }
if( row.housenumber > normalized.number ){ map[row.id].after = row; }
if( map[row.id].before && map[row.id].after ){
map[row.id].diff = {
before: map[row.id].before.housenumber - normalized.number,
after: map[row.id].after.housenumber - normalized.number
};
}
});

// remove segments with less than 2 points; convert map to array
var segments = [];
for( var id in map ){
if( map[id].before && map[id].after ){
segments.push( map[id] );
}
}
}

// could not find two rows to use for interpolation
if( !segments.length ){
return cb( null, null );
}
// could not find two rows to use for interpolation
if( !segments.length ){
return cb( null, null );
}

// sort by miniumum housenumber difference from target housenumber ASC
segments.sort( function( a, b ){
return Math.abs( a.diff.before + a.diff.after ) - Math.abs( b.diff.before + b.diff.after );
});
// sort by miniumum housenumber difference from target housenumber ASC
segments.sort( function( a, b ){
return Math.abs( a.diff.before + a.diff.after ) - Math.abs( b.diff.before + b.diff.after );
});

// select before/after values to use for the interpolation
var before = segments[0].before;
var after = segments[0].after;

// compute interpolated address
var A = { lat: project.toRad( before.proj_lat ), lon: project.toRad( before.proj_lon ) };
var B = { lat: project.toRad( after.proj_lat ), lon: project.toRad( after.proj_lon ) };
var distance = geodesic.distance( A, B );

// if distance = 0 then we can simply use either A or B (they are the same lat/lon)
// else we interpolate between the two positions
var point = A;
if( distance > 0 ){
var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber));
point = geodesic.interpolate( distance, ratio, A, B );
}

// return interpolated address
return cb( null, {
type: 'interpolated',
source: 'mixed',
number: '' + Math.floor( normalized.number ),
lat: parseFloat( project.toDeg( point.lat ).toFixed(7) ),
lon: parseFloat( project.toDeg( point.lon ).toFixed(7) )
// select before/after values to use for the interpolation
var before = segments[0].before;
var after = segments[0].after;

// compute interpolated address
var A = { lat: project.toRad( before.proj_lat ), lon: project.toRad( before.proj_lon ) };
var B = { lat: project.toRad( after.proj_lat ), lon: project.toRad( after.proj_lon ) };
var distance = geodesic.distance( A, B );

// if distance = 0 then we can simply use either A or B (they are the same lat/lon)
// else we interpolate between the two positions
var point = A;
if( distance > 0 ){
var ratio = ((normalized.number - before.housenumber) / (after.housenumber - before.housenumber));
point = geodesic.interpolate( distance, ratio, A, B );
}

// return interpolated address
return cb( null, {
type: 'interpolated',
source: 'mixed',
number: '' + Math.floor( normalized.number ),
lat: parseFloat( project.toDeg( point.lat ).toFixed(7) ),
lon: parseFloat( project.toDeg( point.lon ).toFixed(7) )
});
});
});
};
Expand Down
2 changes: 1 addition & 1 deletion cmd/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ app.use('/demo', express.static('demo'));
app.listen( PORT, function() {

// force loading of libpostal
analyze.street( 'test street' );
//analyze.street( 'test street', function() {} );

console.log( 'server listening on port', PORT );
});
50 changes: 17 additions & 33 deletions lib/analyze.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
const libpostal_service = require( './libpostal_wrapper' );
// constants for controlling how we parse ranges, eg: 'α-β'
// some ranges such as '1-7' are ambiguous; it could mean 'apt 7, no 1'; or
// it could mean 'apt 1, no 7'; or could even be a valid range 'one to seven'.
Expand All @@ -7,47 +8,30 @@ var MIN_RANGE = 1; // the miniumum amount β is higher than α
var MAX_RANGE = 6; // the maximum amount β is higher than α
var MIN_RANGE_HOUSENUMBER = 10; // the minimum acceptible value for both α and β

/*
* Return the appropriate version of node-postal
*/

var _nodepostal_module;
function get_libpostal() {
// lazy load this dependency; since it's large (~2GB RAM) and may be
// accidentally required by a process which doesn't use it.
if (!_nodepostal_module) {
// load the mock library if MOCK_LIBPOSTAL env var is set
if (process.env.MOCK_LIBPOSTAL) {
_nodepostal_module = require('../test/lib/mock_libpostal');
// otherwise load the real thing
} else {
_nodepostal_module = require('node-postal');
}
}

return _nodepostal_module;
}

/**
analyze input streetname string and return a list of expansions.
**/
function street( streetName ){
const postal = get_libpostal();
function street( streetName, callback ){
const postal = libpostal_service();

// use libpostal to expand the address
var expansions = postal.expand.expand_address( streetName );
postal.expand.expand_address( streetName, function streetCallback(err, results, metadata) {
if (err) {
return callback(err);
}

// remove ordinals
expansions = expansions.map(function( item ){
return item.replace( /(([0-9]+)(st|nd|rd|th)($|\s))/gi, '$2 ' ).trim();
});
// remove ordinals
let expansions = results.map(function( item ){
return item.replace( /(([0-9]+)(st|nd|rd|th)($|\s))/gi, '$2 ' ).trim();
});

// remove duplicates
expansions = expansions.filter(function(item, pos, self) {
return self.indexOf(item) === pos;
});
// remove duplicates
expansions = expansions.filter(function(item, pos, self) {
return self.indexOf(item) === pos;
});

return expansions;
callback(null, expansions, metadata);
});
}

/**
Expand Down
22 changes: 22 additions & 0 deletions lib/libpostal_wrapper.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
const mock_libpostal = require('../test/lib/mock_libpostal');

// This module is a wrapper around the actual libpostal service library
// and the mock libpostal library
// it allows an environment variable to switch which library is used in application code

let libpostal_module;
function get_libpostal() {
// return the mock library if MOCK_LIBPOSTAL env var is set
if (process.env.MOCK_LIBPOSTAL) {
return mock_libpostal;
// otherwise return the actual service
} else {
// lazy load the libpostal module so that tests can skip configuring the service
if (!libpostal_module) {
libpostal_module = require( '../libpostal/service' );
}
return libpostal_module;
}
}

module.exports = get_libpostal;
50 changes: 50 additions & 0 deletions libpostal/service.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// deasync is used to proved a sync-looking interface
// to the async call to the libpostal service
const deasync = require('deasync');
const microservice_wrapper = require('pelias-microservice-wrapper');
const pelias_config = require('pelias-config').generate();

const LibpostalServiceConfig = class extends microservice_wrapper.ServiceConfiguration {
constructor(configBlob) {
super('libpostal', configBlob);
}
getUrl(params) {
return this.baseUrl + params.endpoint;
}
getParameters(params) {
return {
address: params.address
};
}
};

// use the 'services.libpostal' config entry if available, otherwise fall back to 'api.services.libpostal'
const config_entry = pelias_config.get('services.libpostal') || pelias_config.get('api.services.libpostal');

if (!config_entry) {
throw new Error('Libpostal configuration not found in `services.libpostal` or `api.services.libpostal`');
}

// create an instance of the libpostal service
const libpostal_service = microservice_wrapper.service(
new LibpostalServiceConfig(config_entry)
);

// create an object that looks like the interface to `node-postal` but uses a remote service
module.exports = {
expand: {
expand_address: function(param, callback) {
const params = {
endpoint: 'expand',
address: param
};

// the libpostal service will not handle an empty parameter
// so return empty array immediately
if (!param) {
return callback(null, []);
}
libpostal_service(params, callback);
}
}
};
3 changes: 0 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,6 @@ see: [source](https://github.com/pelias/interpolation/blob/master/cmd/server.js)
# docker

### build docker image
this can take some time for the first build due to installing libpostal from source
```bash
docker build -t pelias/interpolation .
```
Expand Down Expand Up @@ -434,8 +433,6 @@ To use Interpolation service with the Pelias API, [configure the pelias config f

### install dependencies

*note:* [libpostal](https://github.com/openvenues/node-postal#troubleshooting) **must** be installed on your system before you continue!

The `Dockerfile` in this repo has complete instructions on how to install everything from scratch on Ubuntu.

### TIGER dependency on GDAL
Expand Down
Loading

0 comments on commit 4ff9371

Please sign in to comment.