From 946061e6c284e82ca9bb1c61060b32858608a0a3 Mon Sep 17 00:00:00 2001
From: Julian Simioni <julian@simioni.org>
Date: Wed, 22 Apr 2020 16:02:59 -0700
Subject: [PATCH] Use match instead of match_phrase query for autocomplete

`match_phrase` is now a should query
---
 query/autocomplete.js                         |  4 ++-
 query/autocomplete_defaults.js                |  1 +
 query/view/match_first_tokens_only.js         | 28 +++++++++++++++++++
 .../fixture/autocomplete_custom_boosts.json   | 15 ++++++++--
 .../autocomplete_linguistic_final_token.js    | 13 +++++++--
 ...autocomplete_linguistic_multiple_tokens.js | 17 ++++++++---
 ...uistic_multiple_tokens_complete_numeric.js | 15 ++++++++--
 .../autocomplete_linguistic_with_admin.js     | 14 ++++++++--
 .../autocomplete_single_character_street.js   | 16 ++++++++---
 ...utocomplete_token_matching_permutations.js | 22 ++++++++++-----
 10 files changed, 119 insertions(+), 26 deletions(-)
 create mode 100644 query/view/match_first_tokens_only.js

diff --git a/query/autocomplete.js b/query/autocomplete.js
index 527dea03b..79e85aecc 100644
--- a/query/autocomplete.js
+++ b/query/autocomplete.js
@@ -14,6 +14,7 @@ var views = {
   admin_multi_match_first: require('./view/admin_multi_match_first'),
   admin_multi_match_last: require('./view/admin_multi_match_last'),
   phrase_first_tokens_only:   require('./view/phrase_first_tokens_only'),
+  match_first_tokens_only:   require('./view/match_first_tokens_only'),
   boost_exact_matches:        require('./view/boost_exact_matches'),
   max_character_count_layer_filter:   require('./view/max_character_count_layer_filter'),
   focus_point_filter:         require('./view/focus_point_distance_filter')
@@ -40,7 +41,7 @@ adminFields = adminFields.concat(['add_name_to_multimatch']);
 var query = new peliasQuery.layout.FilteredBooleanQuery();
 
 // mandatory matches
-query.score( views.phrase_first_tokens_only, 'must' );
+query.score( views.match_first_tokens_only, 'must' );
 query.score( views.ngrams_last_token_only_multi( adminFields ), 'must' );
 
 // admin components
@@ -54,6 +55,7 @@ query.score( peliasQuery.view.address('cross_street') );
 query.score( peliasQuery.view.address('postcode') );
 
 // scoring boost
+query.score( views.phrase_first_tokens_only, 'should' );
 query.score( peliasQuery.view.focus( views.ngrams_strict ) );
 query.score( peliasQuery.view.popularity( peliasQuery.view.leaf.match_all ) );
 query.score( peliasQuery.view.population( peliasQuery.view.leaf.match_all ) );
diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
index c05a2182e..e131548ea 100644
--- a/query/autocomplete_defaults.js
+++ b/query/autocomplete_defaults.js
@@ -20,6 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'ngram:field': 'name.default',
   'ngram:boost': 100,
   'ngram:cutoff_frequency': 0.01,
+  'ngram:minimum_should_match': '1<-1 3<-25%',
 
   'phrase:analyzer': 'peliasQuery',
   'phrase:field': 'phrase.default',
diff --git a/query/view/match_first_tokens_only.js b/query/view/match_first_tokens_only.js
new file mode 100644
index 000000000..7c9633053
--- /dev/null
+++ b/query/view/match_first_tokens_only.js
@@ -0,0 +1,28 @@
+const peliasQuery = require('pelias-query');
+
+/**
+  Phrase view which trims the 'input:name' and uses ALL BUT the last token.
+
+  eg. if the input was "100 foo str", then 'input:name' would only be '100 foo'
+  note: it is assumed that the rest of the input is matched using another view.
+**/
+
+module.exports = function( vs ){
+  const view_name = 'match_first_tokens_only';
+
+  // get a copy of the *complete* tokens produced from the input:name
+  const tokens = vs.var('input:name:tokens_complete').get();
+
+  // no valid tokens to use, fail now, don't render this view.
+  if( !tokens || tokens.length < 1 ){ return null; }
+
+  // set the 'input' variable to all but the last token
+  vs.var(`match:${view_name}:input`).set( tokens.join(' ') );
+  vs.var(`match:${view_name}:field`).set(vs.var('phrase:field').get());
+
+  vs.var(`match:${view_name}:analyzer`).set(vs.var('phrase:analyzer').get());
+  vs.var(`match:${view_name}:boost`).set(vs.var('phrase:boost').get());
+  vs.var(`match:${view_name}:minimum_should_match`).set(vs.var('ngram:minimum_should_match').get());
+
+  return peliasQuery.view.leaf.match(view_name)( vs );
+};
diff --git a/test/unit/fixture/autocomplete_custom_boosts.json b/test/unit/fixture/autocomplete_custom_boosts.json
index b9a327e10..061ed3c0b 100644
--- a/test/unit/fixture/autocomplete_custom_boosts.json
+++ b/test/unit/fixture/autocomplete_custom_boosts.json
@@ -5,18 +5,27 @@
       "bool": {
         "must": [
           {
-            "match_phrase": {
+            "match": {
               "phrase.default": {
                 "analyzer": "peliasQuery",
                 "boost": 1,
-                "slop": 3,
-                "query": "foo"
+                "query": "foo",
+                "minimum_should_match": "1<-1 3<-25%"
               }
             }
           }
         ],
         "should": [
           {
+            "match_phrase": {
+              "phrase.default": {
+                "analyzer": "peliasQuery",
+                "boost": 1,
+                "slop": 3,
+                "query": "foo"
+              }
+            }
+          }, {
             "function_score": {
               "query": {
                 "match_all": {}
diff --git a/test/unit/fixture/autocomplete_linguistic_final_token.js b/test/unit/fixture/autocomplete_linguistic_final_token.js
index 66da71b78..189ce2b2b 100644
--- a/test/unit/fixture/autocomplete_linguistic_final_token.js
+++ b/test/unit/fixture/autocomplete_linguistic_final_token.js
@@ -2,16 +2,25 @@ module.exports = {
   'query': {
     'bool': {
       'must': [{
-        'match_phrase': {
+        'match': {
           'phrase.default': {
             'analyzer': 'peliasQuery',
             'boost': 1,
-            'slop': 3,
+            'minimum_should_match': '1<-1 3<-25%',
             'query': 'one'
           }
         }
       }],
       'should':[{
+        'match_phrase': {
+          'phrase.default': {
+            'analyzer': 'peliasQuery',
+            'boost': 1,
+            'slop': 3,
+            'query': 'one'
+          }
+        }
+      }, {
         'function_score': {
           'query': {
             'match_all': {}
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
index 5b4fb60d1..2361fd6de 100644
--- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
@@ -2,11 +2,11 @@ module.exports = {
   'query': {
     'bool': {
       'must': [{
-        'match_phrase': {
+        'match': {
           'phrase.default': {
             'analyzer': 'peliasQuery',
             'boost': 1,
-            'slop': 3,
+            'minimum_should_match': '1<-1 3<-25%',
             'query': 'one two'
           }
         }
@@ -37,9 +37,18 @@ module.exports = {
           }
         }
       }],
-      'should':[
+      'should':[{
+          'match_phrase': {
+            'phrase.default': {
+              'analyzer': 'peliasQuery',
+              'boost': 1,
+              'slop': 3,
+              'query': 'one two'
+            }
+          }
+        },
         {
-        'function_score': {
+          'function_score': {
           'query': {
             'match_all': {}
           },
diff --git a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js
index 92471ba24..548417521 100644
--- a/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js
+++ b/test/unit/fixture/autocomplete_linguistic_multiple_tokens_complete_numeric.js
@@ -2,11 +2,11 @@ module.exports = {
   'query': {
     'bool': {
       'must': [{
-        'match_phrase': {
+        'match': {
           'phrase.default': {
             'analyzer': 'peliasQuery',
             'boost': 1,
-            'slop': 3,
+            'minimum_should_match': '1<-1 3<-25%',
             'query': '1 2'
           }
         }
@@ -25,7 +25,16 @@ module.exports = {
           }
         }
       }],
-      'should': [
+      'should': [{
+          'match_phrase': {
+            'phrase.default': {
+              'analyzer': 'peliasQuery',
+              'boost': 1,
+              'slop': 3,
+              'query': '1 2'
+            }
+          }
+        },
         {
           'function_score': {
             'query': {
diff --git a/test/unit/fixture/autocomplete_linguistic_with_admin.js b/test/unit/fixture/autocomplete_linguistic_with_admin.js
index b198d7f14..69636125a 100644
--- a/test/unit/fixture/autocomplete_linguistic_with_admin.js
+++ b/test/unit/fixture/autocomplete_linguistic_with_admin.js
@@ -3,11 +3,11 @@ module.exports = {
     'bool': {
       'must': [
         {
-          'match_phrase': {
+          'match': {
             'phrase.default': {
               'analyzer': 'peliasQuery',
               'boost': 1,
-              'slop': 3,
+              'minimum_should_match': '1<-1 3<-25%',
               'query': 'one two'
             }
           }
@@ -36,6 +36,16 @@ module.exports = {
         }
       ],
       'should': [
+        {
+          'match_phrase': {
+            'phrase.default': {
+              'analyzer': 'peliasQuery',
+              'boost': 1,
+              'slop': 3,
+              'query': 'one two'
+            }
+          }
+        },
         {
           'function_score': {
             'query': {
diff --git a/test/unit/fixture/autocomplete_single_character_street.js b/test/unit/fixture/autocomplete_single_character_street.js
index dedacdc34..0f8be69ab 100644
--- a/test/unit/fixture/autocomplete_single_character_street.js
+++ b/test/unit/fixture/autocomplete_single_character_street.js
@@ -2,11 +2,11 @@ module.exports = {
   'query': {
     'bool': {
       'must': [{
-        'match_phrase': {
+        'match': {
           'phrase.default': {
             'analyzer': 'peliasQuery',
             'boost': 1,
-            'slop': 3,
+            'minimum_should_match': '1<-1 3<-25%',
             'query': 'k road'
           }
         }
@@ -32,8 +32,7 @@ module.exports = {
           'type': 'cross_fields'
         }
       }],
-      'should':[
-        {
+      'should':[{
           'match': {
             'address_parts.street': {
               'query': 'k road',
@@ -44,6 +43,15 @@ module.exports = {
           }
         },
         {
+        'match_phrase': {
+          'phrase.default': {
+            'analyzer': 'peliasQuery',
+            'boost': 1,
+            'slop': 3,
+            'query': 'k road'
+          }
+        }
+      }, {
         'function_score': {
           'query': {
             'match_all': {}
diff --git a/test/unit/query/autocomplete_token_matching_permutations.js b/test/unit/query/autocomplete_token_matching_permutations.js
index 1b9b1e170..11c2276ac 100644
--- a/test/unit/query/autocomplete_token_matching_permutations.js
+++ b/test/unit/query/autocomplete_token_matching_permutations.js
@@ -21,6 +21,7 @@ const defaults = new peliasQuery.Vars( require('../../../query/autocomplete_defa
 const views = {
   ngrams_last_token_only:     require('../../../query/view/ngrams_last_token_only'),
   ngrams_last_token_only_multi: require('../../../query/view/ngrams_last_token_only_multi')(adminFields),
+  match_first_tokens_only:   require('../../../query/view/match_first_tokens_only'),
   phrase_first_tokens_only:   require('../../../query/view/phrase_first_tokens_only'),
 };
 
@@ -84,8 +85,9 @@ module.exports.tests.single_token = function(test, common) {
     var vs = vars( clean );
 
     assert( t, generate( clean ), {
-      must: [ views.phrase_first_tokens_only( vs ) ],
+      must: [ views.match_first_tokens_only( vs ) ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -124,8 +126,9 @@ module.exports.tests.single_token = function(test, common) {
     var vs = vars( clean );
 
     assert( t, generate( clean ), {
-      must: [ views.phrase_first_tokens_only( vs ) ],
+      must: [ views.match_first_tokens_only( vs ) ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -164,8 +167,9 @@ module.exports.tests.single_token = function(test, common) {
     var vs = vars( clean );
 
     assert( t, generate( clean ), {
-      must: [ views.phrase_first_tokens_only( vs ) ],
+      must: [ views.match_first_tokens_only( vs ) ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -189,10 +193,11 @@ module.exports.tests.multiple_tokens = function(test, common) {
 
     assert( t, generate( clean ), {
       must: [
-        views.phrase_first_tokens_only( vs ),
+        views.match_first_tokens_only( vs ),
         views.ngrams_last_token_only_multi( vs )
       ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -212,9 +217,10 @@ module.exports.tests.multiple_tokens = function(test, common) {
 
     assert( t, generate( clean ), {
       must: [
-        views.phrase_first_tokens_only( vs )
+        views.match_first_tokens_only( vs )
       ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -236,10 +242,11 @@ module.exports.tests.multiple_tokens = function(test, common) {
 
     assert( t, generate( clean ), {
       must: [
-        views.phrase_first_tokens_only( vs ),
+        views.match_first_tokens_only( vs ),
         views.ngrams_last_token_only_multi( vs )
       ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]
@@ -259,9 +266,10 @@ module.exports.tests.multiple_tokens = function(test, common) {
 
     assert( t, generate( clean ), {
       must: [
-        views.phrase_first_tokens_only( vs )
+        views.match_first_tokens_only( vs )
       ],
       should: [
+        views.phrase_first_tokens_only( vs ),
         peliasQuery.view.popularity( peliasQuery.view.leaf.match_all )( vs ),
         peliasQuery.view.population( peliasQuery.view.leaf.match_all )( vs )
       ]