Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Append unclassified tokens to the street #28

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions classifier/scheme/street_name.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@ module.exports = [
}
]
},
{
// du 4 septembre
confidence: 0.5,
Class: StreetNameClassification,
scheme: [
{
is: ['StopWordClassification']
},
{
is: ['NumericClassification'],
not: ['PostcodeClassification']
},
{
is: ['AlphaClassification'],
not: ['StreetClassification', 'IntersectionClassification', 'LocalityClassification']
}
]
},
{
// dos Fiéis de Deus
confidence: 0.5,
Expand Down
4 changes: 3 additions & 1 deletion parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const TokenDistanceFilter = require('../solver/TokenDistanceFilter')
const MustNotPreceedFilter = require('../solver/MustNotPreceedFilter')
const MustNotFollowFilter = require('../solver/MustNotFollowFilter')
const SubsetFilter = require('../solver/SubsetFilter')
const FillSolver = require('../solver/FillSolver')

class AddressParser extends Parser {
constructor (options) {
Expand Down Expand Up @@ -108,7 +109,8 @@ class AddressParser extends Parser {
new MustNotFollowFilter('LocalityClassification', 'RegionClassification'),
new MustNotFollowFilter('LocalityClassification', 'CountryClassification'),
new TokenDistanceFilter(),
new SubsetFilter()
new SubsetFilter(),
new FillSolver()
],
options
)
Expand Down
29 changes: 29 additions & 0 deletions solver/FillSolver.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
class FillSolver {
solve (tokenizer) {
tokenizer.solution.forEach(solution => {
// Get streets from this solution
const streets = solution.pair.filter(p => p.classification.constructor.name === 'StreetClassification')
// Get all nodes that are not in the solution
const missings = tokenizer.section.reduce((acc, section) => {
return acc.concat(section.graph.findAll('child').filter(c => !solution.pair.some(p => p.span.intersects(c))))
}, [])

// For all missing spans, check if they are street prefix and complete the solution
// The missing span should not be a end token
missings.forEach(missing => {
const street = streets.find(s => s.span.end === missing.start - 1)
const prefix = street && street.span.graph.findOne('child:first')

if (prefix && prefix.classifications.StreetPrefixClassification && !missing.classifications.EndTokenClassification) {
const span = prefix.graph.findAll('parent').find(phrase => phrase.start === prefix.start && phrase.end === missing.end)
const streetIndex = solution.pair.indexOf(street)
if (span && streetIndex < solution.pair.length) {
solution.pair[streetIndex].span = span
}
}
})
})
}
}

module.exports = FillSolver
20 changes: 20 additions & 0 deletions test/address.fra.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,26 @@ const testcase = (test, common) => {
assert(`Rue de l'Adjudant Réau Paris`, [
{ street: `Rue de l'Adjudant Réau` }, { locality: 'Paris' }
])

assert('16 Rue Des Petits Carreaux', [
{ housenumber: '16' }, { street: 'Rue Des Petits' }
])

assert('16 Rue Des Petits Carreaux Paris', [
{ housenumber: '16' }, { street: 'Rue Des Petits Carreaux' }, { locality: 'Paris' }
])

assert('Rue Saint-Germain Dou', [
{ street: 'Rue Saint-Germain' }
])

assert('Rue Saint-Germain Dourdan', [
{ street: 'Rue Saint-Germain' }, { locality: 'Dourdan' }
])

assert('Rue du 8 Mai Chevreuse', [
{ street: 'Rue du 8 Mai' }, { locality: 'Chevreuse' }
])
}

module.exports.all = (tape, common) => {
Expand Down