-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package usaddress | ||
|
||
import ( | ||
"github.com/xrash/smetrics" | ||
) | ||
|
||
func stringSimilarity(s1, s2 string) float64 { | ||
return smetrics.JaroWinkler(s1, s2, 0.1, 4) | ||
} | ||
|
||
type weights struct { | ||
PrimaryNumber float64 | ||
StreetName float64 | ||
ZIPCode float64 | ||
City float64 | ||
State float64 | ||
StreetSuffix float64 | ||
StreetPredir float64 | ||
StreetPostdir float64 | ||
SecondaryUnit float64 | ||
Plus4 float64 | ||
POBox float64 | ||
RuralRoute float64 | ||
HighwayContract float64 | ||
} | ||
|
||
// Default weights | ||
// | ||
// PrimaryNumber, StreetName, and ZIPCode are given the highest weights because they are critical for identifying a specific location. | ||
// City and State are important but less specific than ZIPCode. | ||
// StreetSuffix, StreetPredir, and StreetPostdir have lower weights as they often have less impact on mail delivery. | ||
// SecondaryUnit and Plus4 are less critical and have the lowest weights. | ||
// POBox, RuralRoute, and HighwayContract are alternative address types and replace the street address components when present. | ||
var defaultWeights = weights{ | ||
PrimaryNumber: 0.25, | ||
StreetName: 0.25, | ||
ZIPCode: 0.20, | ||
City: 0.10, | ||
State: 0.05, | ||
StreetSuffix: 0.05, | ||
StreetPredir: 0.025, | ||
StreetPostdir: 0.025, | ||
SecondaryUnit: 0.025, | ||
Plus4: 0.025, | ||
POBox: 0.25, | ||
RuralRoute: 0.25, | ||
HighwayContract: 0.25, | ||
} | ||
|
||
func (a Address) Similarity(other Address) float64 { | ||
var totalWeight float64 | ||
var similarityScore float64 | ||
|
||
// Function to add similarity and weight | ||
addSimilarity := func(weight float64, s1, s2 string) { | ||
sim := stringSimilarity(s1, s2) | ||
similarityScore += sim * weight | ||
totalWeight += weight | ||
} | ||
|
||
// Check for POBox, RuralRoute, or HighwayContract | ||
if a.POBox != "" && other.POBox != "" { | ||
addSimilarity(defaultWeights.POBox, a.POBox, other.POBox) | ||
} else if a.RuralRoute != "" && other.RuralRoute != "" { | ||
addSimilarity(defaultWeights.RuralRoute, a.RuralRoute, other.RuralRoute) | ||
} else if a.HighwayContract != "" && other.HighwayContract != "" { | ||
addSimilarity(defaultWeights.HighwayContract, a.HighwayContract, other.HighwayContract) | ||
} else { | ||
// Compare street address components | ||
addSimilarity(defaultWeights.PrimaryNumber, a.PrimaryNumber, other.PrimaryNumber) | ||
addSimilarity(defaultWeights.StreetPredir, a.StreetPredir, other.StreetPredir) | ||
addSimilarity(defaultWeights.StreetName, a.StreetName, other.StreetName) | ||
addSimilarity(defaultWeights.StreetSuffix, a.StreetSuffix, other.StreetSuffix) | ||
addSimilarity(defaultWeights.StreetPostdir, a.StreetPostdir, other.StreetPostdir) | ||
addSimilarity(defaultWeights.SecondaryUnit, a.SecondaryUnit, other.SecondaryUnit) | ||
} | ||
|
||
// Compare City, State, ZIPCode, Plus4 | ||
addSimilarity(defaultWeights.City, a.City, other.City) | ||
addSimilarity(defaultWeights.State, a.State, other.State) | ||
addSimilarity(defaultWeights.ZIPCode, a.ZIPCode, other.ZIPCode) | ||
if a.Plus4 != "" || other.Plus4 != "" { | ||
addSimilarity(defaultWeights.Plus4, a.Plus4, other.Plus4) | ||
} | ||
|
||
if totalWeight == 0 { | ||
return 0.0 | ||
} | ||
return similarityScore / totalWeight | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
package usaddress | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestAddressSimilarity(t *testing.T) { | ||
baseAddress := Address{ | ||
PrimaryNumber: "123", | ||
StreetPredir: "N", | ||
StreetName: "MAIN", | ||
StreetSuffix: "ST", | ||
StreetPostdir: "", | ||
SecondaryUnit: "APT 4B", | ||
City: "ANYTOWN", | ||
State: "CA", | ||
ZIPCode: "90210", | ||
Plus4: "1234", | ||
} | ||
|
||
tests := []struct { | ||
name string | ||
addr1 Address | ||
addr2 Address | ||
expected float64 | ||
}{ | ||
{ | ||
name: "Identical Addresses", | ||
addr1: baseAddress, | ||
addr2: baseAddress, | ||
expected: 1.0, // Expect full similarity | ||
}, | ||
{ | ||
name: "Different Primary Number", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.PrimaryNumber = "124" | ||
return a | ||
}(), | ||
expected: 0.956, | ||
}, | ||
{ | ||
name: "Different Street Name", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.StreetName = "MAINN" | ||
return a | ||
}(), | ||
expected: 0.99, | ||
}, | ||
{ | ||
name: "Different ZIPCode", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.ZIPCode = "90211" | ||
return a | ||
}(), | ||
expected: 0.984, | ||
}, | ||
{ | ||
name: "Different City", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.City = "OTHERTOWN" | ||
return a | ||
}(), | ||
expected: 0.967, | ||
}, | ||
{ | ||
name: "Different State", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.State = "NY" | ||
return a | ||
}(), | ||
expected: 0.95, | ||
}, | ||
{ | ||
name: "Different Street Suffix", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.StreetSuffix = "AVE" | ||
return a | ||
}(), | ||
expected: 0.95, | ||
}, | ||
{ | ||
name: "Different StreetPredir", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.StreetPredir = "S" | ||
return a | ||
}(), | ||
expected: 0.975, | ||
}, | ||
{ | ||
name: "Different StreetPostdir", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.StreetPostdir = "NW" | ||
return a | ||
}(), | ||
expected: 0.975, | ||
}, | ||
{ | ||
name: "Different Secondary Unit", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.SecondaryUnit = "APT 5C" | ||
return a | ||
}(), | ||
expected: 0.997, | ||
}, | ||
{ | ||
name: "Different Plus4", | ||
addr1: baseAddress, | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.Plus4 = "5678" | ||
return a | ||
}(), | ||
expected: 0.975, | ||
}, | ||
{ | ||
name: "POBox Instead of Street Address", | ||
addr1: func() Address { | ||
a := baseAddress | ||
a.POBox = "PO BOX 789" | ||
// Clear street address components | ||
a.PrimaryNumber = "" | ||
a.StreetName = "" | ||
a.StreetSuffix = "" | ||
a.StreetPredir = "" | ||
a.StreetPostdir = "" | ||
a.SecondaryUnit = "" | ||
return a | ||
}(), | ||
addr2: func() Address { | ||
a := baseAddress | ||
a.POBox = "PO BOX 789" | ||
// Clear street address components | ||
a.PrimaryNumber = "" | ||
a.StreetName = "" | ||
a.StreetSuffix = "" | ||
a.StreetPredir = "" | ||
a.StreetPostdir = "" | ||
a.SecondaryUnit = "" | ||
return a | ||
}(), | ||
expected: 1.0, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
similarity := tt.addr1.Similarity(tt.addr2) | ||
require.InDelta(t, tt.expected, similarity, 0.01) | ||
}) | ||
} | ||
} |