Skip to content

Commit

Permalink
fix(dedupe): improved deduplication between USA ZIP vs ZIP+4 properties
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Jul 11, 2024
1 parent 24d3306 commit d6d34d8
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"node": true,
"curly": true,
"eqeqeq": true,
"esversion": 9,
"esversion": "2022",
"freeze": true,
"immed": true,
"indent": 2,
Expand Down
31 changes: 26 additions & 5 deletions helper/diffPlaces.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,13 @@ function isLayerDifferent(item1, item2){
return false;
}

function isUSA(item) {
if (!_.isArray(item?.parent?.country_a)) { return false; }
return item.parent.country_a[0] === 'USA';
}

function isUsState(item) {
if (!_.isArray(item.parent.country_a)) { return false; }
return item.parent.country_a[0] === 'USA' && item.layer === 'region';
return isUSA(item) && item.layer === 'region';
}

// Geonames records in the locality and localadmin layer are parented by themselves
Expand Down Expand Up @@ -206,7 +210,7 @@ function isAddressDifferent(item1, item2){
// only compare zip if both records have it, otherwise just ignore and assume it's the same
// since by this time we've already compared parent hierarchies
if( _.has(address1, 'zip') && _.has(address2, 'zip') ){
if( isPropertyDifferent(address1, address2, 'zip') ){ return true; }
if( isZipDifferent(item1, item2) ){ return true; }
}

return false;
Expand Down Expand Up @@ -255,10 +259,27 @@ function isDifferent(item1, item2, requestLanguage){
return false;
}

/**
* return true if zip codes are different
*
* note: handle USA ZIP+4 vs ZIP 98036-6119 vs 98036
*/
function isZipDifferent(item1, item2) {
let address1 = _.get(item1, 'address_parts');
let address2 = _.get(item2, 'address_parts');

if (isUSA(item1) && isUSA(item2)) {
const firstWordOnly = (str) => normalizeString(str).split(' ')[0];
return isPropertyDifferent(address1, address2, 'zip', firstWordOnly);
}

return isPropertyDifferent(address1, address2, 'zip');
}

/**
* return true if properties are different
*/
function isPropertyDifferent(item1, item2, prop ){
function isPropertyDifferent(item1, item2, prop, normalizer = normalizeString ){

// if neither item has prop, we consider them the same
if( !_.has(item1, prop) && !_.has(item2, prop) ){ return false; }
Expand All @@ -274,7 +295,7 @@ function isPropertyDifferent(item1, item2, prop ){
let prop1StringValue = field.getStringValue( prop1[i] );
for( let j=0; j<prop2.length; j++ ){
let prop2StringValue = field.getStringValue( prop2[j] );
if( normalizeString( prop1StringValue ) === normalizeString( prop2StringValue ) ){
if( normalizer( prop1StringValue ) === normalizer( prop2StringValue ) ){
return false;
}
}
Expand Down
26 changes: 26 additions & 0 deletions test/unit/helper/diffPlaces.js
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,32 @@ module.exports.tests.dedupe = function(test, common) {
t.end();
});

test('ZIP vs ZIP+4', function(t) {
var item1 = {
'parent': {
'country_a': ['USA']
},
'address_parts': {
'number': '1',
'street': 'Main Street',
'zip': '90210'
}
};
var item2 = {
'parent': {
'country_a': ['USA']
},
'address_parts': {
'number': '1',
'street': 'Main Street',
'zip': '90210-1111'
}
};

t.false(isDifferent(item1, item2), 'should be the same');
t.end();
});

test('completely empty objects', function(t) {
var item1 = {};
var item2 = {};
Expand Down

0 comments on commit d6d34d8

Please sign in to comment.