From 946061e6c284e82ca9bb1c61060b32858608a0a3 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Wed, 22 Apr 2020 16:02:59 -0700 Subject: [PATCH] Use match instead of match_phrase query for autocomplete `match_phrase` is now a should query --- query/autocomplete.js | 4 ++- query/autocomplete_defaults.js | 1 + query/view/match_first_tokens_only.js | 28 +++++++++++++++++++ .../fixture/autocomplete_custom_boosts.json | 15 ++++++++-- .../autocomplete_linguistic_final_token.js | 13 +++++++-- ...autocomplete_linguistic_multiple_tokens.js | 17 ++++++++--- ...uistic_multiple_tokens_complete_numeric.js | 15 ++++++++-- .../autocomplete_linguistic_with_admin.js | 14 ++++++++-- .../autocomplete_single_character_street.js | 16 ++++++++--- ...utocomplete_token_matching_permutations.js | 22 ++++++++++----- 10 files changed, 119 insertions(+), 26 deletions(-) create mode 100644 query/view/match_first_tokens_only.js diff --git a/query/autocomplete.js b/query/autocomplete.js index 527dea03b..79e85aecc 100644 --- a/query/autocomplete.js +++ b/query/autocomplete.js @@ -14,6 +14,7 @@ var views = { admin_multi_match_first: require('./view/admin_multi_match_first'), admin_multi_match_last: require('./view/admin_multi_match_last'), phrase_first_tokens_only: require('./view/phrase_first_tokens_only'), + match_first_tokens_only: require('./view/match_first_tokens_only'), boost_exact_matches: require('./view/boost_exact_matches'), max_character_count_layer_filter: require('./view/max_character_count_layer_filter'), focus_point_filter: require('./view/focus_point_distance_filter') @@ -40,7 +41,7 @@ adminFields = adminFields.concat(['add_name_to_multimatch']); var query = new peliasQuery.layout.FilteredBooleanQuery(); // mandatory matches -query.score( views.phrase_first_tokens_only, 'must' ); +query.score( views.match_first_tokens_only, 'must' ); query.score( views.ngrams_last_token_only_multi( adminFields ), 'must' ); // admin components @@ -54,6 +55,7 @@ query.score( peliasQuery.view.address('cross_street') ); query.score( peliasQuery.view.address('postcode') ); // scoring boost +query.score( views.phrase_first_tokens_only, 'should' ); query.score( peliasQuery.view.focus( views.ngrams_strict ) ); query.score( peliasQuery.view.popularity( peliasQuery.view.leaf.match_all ) ); query.score( peliasQuery.view.population( peliasQuery.view.leaf.match_all ) ); diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index c05a2182e..e131548ea 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -20,6 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'ngram:field': 'name.default', 'ngram:boost': 100, 'ngram:cutoff_frequency': 0.01, + 'ngram:minimum_should_match': '1<-1 3<-25%', 'phrase:analyzer': 'peliasQuery', 'phrase:field': 'phrase.default', diff --git a/query/view/match_first_tokens_only.js b/query/view/match_first_tokens_only.js new file mode 100644 index 000000000..7c9633053 --- /dev/null +++ b/query/view/match_first_tokens_only.js @@ -0,0 +1,28 @@ +const peliasQuery = require('pelias-query'); + +/** + Phrase view which trims the 'input:name' and uses ALL BUT the last token. + + eg. if the input was "100 foo str", then 'input:name' would only be '100 foo' + note: it is assumed that the rest of the input is matched using another view. +**/ + +module.exports = function( vs ){ + const view_name = 'match_first_tokens_only'; + + // get a copy of the *complete* tokens produced from the input:name + const tokens = vs.var('input:name:tokens_complete').get(); + + // no valid tokens to use, fail now, don't render this view. + if( !tokens || tokens.length < 1 ){ return null; } + + // set the 'input' variable to all but the last token + vs.var(`match:${view_name}:input`).set( tokens.join(' ') ); + vs.var(`match:${view_name}:field`).set(vs.var('phrase:field').get()); + + vs.var(`match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get()); + vs.var(`match:${view_name}:boost`).set(vs.var('phrase:boost').get()); + vs.var(`match:${view_name}:minimum_should_match`).set(vs.var('ngram:minimum_should_match').get()); + + return peliasQuery.view.leaf.match(view_name)( vs ); +}; diff --git a/test/unit/fixture/autocomplete_custom_boosts.json b/test/unit/fixture/autocomplete_custom_boosts.json index b9a327e10..061ed3c0b 100644 --- a/test/unit/fixture/autocomplete_custom_boosts.json +++ b/test/unit/fixture/autocomplete_custom_boosts.json @@ -5,18 +5,27 @@ "bool": { "must": [ { - "match_phrase": { + "match": { "phrase.default": { "analyzer": "peliasQuery", "boost": 1, - "slop": 3, - "query": "foo" + "query": "foo", + "minimum_should_match": "1<-1 3<-25%" } } } ], "should": [ { + "match_phrase": { + "phrase.default": { + "analyzer": "peliasQuery", + "boost": 1, + "slop": 3, + "query": "foo" + } + } + }, { "function_score": { "query": { "match_all": {} diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js index 66da71b78..189ce2b2b 100644 --- a/test/unit/fixture/autocomplete_linguistic_final_token.js +++ b/test/unit/fixture/autocomplete_linguistic_final_token.js @@ -2,16 +2,25 @@ module.exports = { 'query': { 'bool': { 'must': [{ - 'match_phrase': { + 'match': { 'phrase.default': { 'analyzer': 'peliasQuery', 'boost': 1, - 'slop': 3, + 'minimum_should_match': '1<-1 3<-25%', 'query': 'one' } } }], 'should':[{ + 'match_phrase': { + 'phrase.default': { + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3, + 'query': 'one' + } + } + }, { 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js index 5b4fb60d1..2361fd6de 100644 --- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js @@ -2,11 +2,11 @@ module.exports = { 'query': { 'bool': { 'must': [{ - 'match_phrase': { + 'match': { 'phrase.default': { 'analyzer': 'peliasQuery', 'boost': 1, - 'slop': 3, + 'minimum_should_match': '1<-1 3<-25%', 'query': 'one two' } } @@ -37,9 +37,18 @@ module.exports = { } } }], - 'should':[ + 'should':[{ + 'match_phrase': { + 'phrase.default': { + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3, + 'query': 'one two' + } + } + }, { - 'function_score': { + 'function_score': { 'query': { 'match_all': {} }, diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js index 92471ba24..548417521 100644 --- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js +++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js @@ -2,11 +2,11 @@ module.exports = { 'query': { 'bool': { 'must': [{ - 'match_phrase': { + 'match': { 'phrase.default': { 'analyzer': 'peliasQuery', 'boost': 1, - 'slop': 3, + 'minimum_should_match': '1<-1 3<-25%', 'query': '1 2' } } @@ -25,7 +25,16 @@ module.exports = { } } }], - 'should': [ + 'should': [{ + 'match_phrase': { + 'phrase.default': { + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3, + 'query': '1 2' + } + } + }, { 'function_score': { 'query': { diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js index b198d7f14..69636125a 100644 --- a/test/unit/fixture/autocomplete_linguistic_with_admin.js +++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js @@ -3,11 +3,11 @@ module.exports = { 'bool': { 'must': [ { - 'match_phrase': { + 'match': { 'phrase.default': { 'analyzer': 'peliasQuery', 'boost': 1, - 'slop': 3, + 'minimum_should_match': '1<-1 3<-25%', 'query': 'one two' } } @@ -36,6 +36,16 @@ module.exports = { } ], 'should': [ + { + 'match_phrase': { + 'phrase.default': { + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3, + 'query': 'one two' + } + } + }, { 'function_score': { 'query': { diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js index dedacdc34..0f8be69ab 100644 --- a/test/unit/fixture/autocomplete_single_character_street.js +++ b/test/unit/fixture/autocomplete_single_character_street.js @@ -2,11 +2,11 @@ module.exports = { 'query': { 'bool': { 'must': [{ - 'match_phrase': { + 'match': { 'phrase.default': { 'analyzer': 'peliasQuery', 'boost': 1, - 'slop': 3, + 'minimum_should_match': '1<-1 3<-25%', 'query': 'k road' } } @@ -32,8 +32,7 @@ module.exports = { 'type': 'cross_fields' } }], - 'should':[ - { + 'should':[{ 'match': { 'address_parts.street': { 'query': 'k road', @@ -44,6 +43,15 @@ module.exports = { } }, { + 'match_phrase': { + 'phrase.default': { + 'analyzer': 'peliasQuery', + 'boost': 1, + 'slop': 3, + 'query': 'k road' + } + } + }, { 'function_score': { 'query': { 'match_all': {} diff --git a/test/unit/query/autocomplete_token_matching_permutations.js b/test/unit/query/autocomplete_token_matching_permutations.js index 1b9b1e170..11c2276ac 100644 --- a/test/unit/query/autocomplete_token_matching_permutations.js +++ b/test/unit/query/autocomplete_token_matching_permutations.js @@ -21,6 +21,7 @@ const defaults = new peliasQuery.Vars( require('../../../query/autocomplete_defa const views = { ngrams_last_token_only: require('../../../query/view/ngrams_last_token_only'), ngrams_last_token_only_multi: require('../../../query/view/ngrams_last_token_only_multi')(adminFields), + match_first_tokens_only: require('../../../query/view/match_first_tokens_only'), phrase_first_tokens_only: require('../../../query/view/phrase_first_tokens_only'), }; @@ -84,8 +85,9 @@ module.exports.tests.single_token = function(test, common) { var vs = vars( clean ); assert( t, generate( clean ), { - must: [ views.phrase_first_tokens_only( vs ) ], + must: [ views.match_first_tokens_only( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -124,8 +126,9 @@ module.exports.tests.single_token = function(test, common) { var vs = vars( clean ); assert( t, generate( clean ), { - must: [ views.phrase_first_tokens_only( vs ) ], + must: [ views.match_first_tokens_only( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -164,8 +167,9 @@ module.exports.tests.single_token = function(test, common) { var vs = vars( clean ); assert( t, generate( clean ), { - must: [ views.phrase_first_tokens_only( vs ) ], + must: [ views.match_first_tokens_only( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -189,10 +193,11 @@ module.exports.tests.multiple_tokens = function(test, common) { assert( t, generate( clean ), { must: [ - views.phrase_first_tokens_only( vs ), + views.match_first_tokens_only( vs ), views.ngrams_last_token_only_multi( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -212,9 +217,10 @@ module.exports.tests.multiple_tokens = function(test, common) { assert( t, generate( clean ), { must: [ - views.phrase_first_tokens_only( vs ) + views.match_first_tokens_only( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -236,10 +242,11 @@ module.exports.tests.multiple_tokens = function(test, common) { assert( t, generate( clean ), { must: [ - views.phrase_first_tokens_only( vs ), + views.match_first_tokens_only( vs ), views.ngrams_last_token_only_multi( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ] @@ -259,9 +266,10 @@ module.exports.tests.multiple_tokens = function(test, common) { assert( t, generate( clean ), { must: [ - views.phrase_first_tokens_only( vs ) + views.match_first_tokens_only( vs ) ], should: [ + views.phrase_first_tokens_only( vs ), peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ), peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs ) ]