@@ -227,28 +227,73 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
227
227
if name_fulls :
228
228
fulls_count = sum (t .count for t in name_fulls )
229
229
if len (name_partials ) == 1 :
230
- penalty += min (1 , max (0 , (exp_count - 50 * fulls_count ) / (1000 * fulls_count )))
231
- # At this point drop unindexed partials from the address.
232
- # This might yield wrong results, nothing we can do about that.
233
- if not partials_indexed :
234
- addr_tokens = [t .token for t in addr_partials if t .is_indexed ]
230
+ penalty += min (0.5 , max (0 , (exp_count - 50 * fulls_count ) / (2000 * fulls_count )))
231
+ if partials_indexed :
235
232
penalty += 1.2 * sum (t .penalty for t in addr_partials if not t .is_indexed )
236
- # Any of the full names applies with all of the partials from the address
237
- yield penalty , fulls_count / (2 ** len (addr_tokens )),\
238
- dbf .lookup_by_any_name ([t .token for t in name_fulls ],
239
- addr_tokens ,
240
- fulls_count > 30000 / max (1 , len (addr_tokens )))
233
+
234
+ yield penalty ,fulls_count / (2 ** len (addr_tokens )), \
235
+ self .get_full_name_ranking (name_fulls , addr_partials ,
236
+ fulls_count > 30000 / max (1 , len (addr_tokens )))
241
237
242
238
# To catch remaining results, lookup by name and address
243
239
# We only do this if there is a reasonable number of results expected.
244
240
exp_count = exp_count / (2 ** len (addr_tokens )) if addr_tokens else exp_count
245
241
if exp_count < 10000 and all (t .is_indexed for t in name_partials .values ()):
246
- lookup = [dbf .FieldLookup ('name_vector' , list (name_partials .keys ()), lookups .LookupAll )]
247
- if addr_tokens :
248
- lookup .append (dbf .FieldLookup ('nameaddress_vector' , addr_tokens , lookups .LookupAll ))
249
242
penalty += 0.35 * max (1 if name_fulls else 0.1 ,
250
243
5 - len (name_partials ) - len (addr_tokens ))
251
- yield penalty , exp_count , lookup
244
+ yield penalty , exp_count ,\
245
+ self .get_name_address_ranking (list (name_partials .keys ()), addr_partials )
246
+
247
+
248
+ def get_name_address_ranking (self , name_tokens : List [int ],
249
+ addr_partials : List [Token ]) -> List [dbf .FieldLookup ]:
250
+ """ Create a ranking expression looking up by name and address.
251
+ """
252
+ lookup = [dbf .FieldLookup ('name_vector' , name_tokens , lookups .LookupAll )]
253
+
254
+ addr_restrict_tokens = []
255
+ addr_lookup_tokens = []
256
+ for t in addr_partials :
257
+ if t .is_indexed :
258
+ if t .addr_count > 20000 :
259
+ addr_restrict_tokens .append (t .token )
260
+ else :
261
+ addr_lookup_tokens .append (t .token )
262
+
263
+ if addr_restrict_tokens :
264
+ lookup .append (dbf .FieldLookup ('nameaddress_vector' ,
265
+ addr_restrict_tokens , lookups .Restrict ))
266
+ if addr_lookup_tokens :
267
+ lookup .append (dbf .FieldLookup ('nameaddress_vector' ,
268
+ addr_lookup_tokens , lookups .LookupAll ))
269
+
270
+ return lookup
271
+
272
+
273
+ def get_full_name_ranking (self , name_fulls : List [Token ], addr_partials : List [Token ],
274
+ use_lookup : bool ) -> List [dbf .FieldLookup ]:
275
+ """ Create a ranking expression with full name terms and
276
+ additional address lookup. When 'use_lookup' is true, then
277
+ address lookups will use the index, when the occurences are not
278
+ too many.
279
+ """
280
+ # At this point drop unindexed partials from the address.
281
+ # This might yield wrong results, nothing we can do about that.
282
+ if use_lookup :
283
+ addr_restrict_tokens = []
284
+ addr_lookup_tokens = []
285
+ for t in addr_partials :
286
+ if t .is_indexed :
287
+ if t .addr_count > 20000 :
288
+ addr_restrict_tokens .append (t .token )
289
+ else :
290
+ addr_lookup_tokens .append (t .token )
291
+ else :
292
+ addr_restrict_tokens = [t .token for t in addr_partials if t .is_indexed ]
293
+ addr_lookup_tokens = []
294
+
295
+ return dbf .lookup_by_any_name ([t .token for t in name_fulls ],
296
+ addr_restrict_tokens , addr_lookup_tokens )
252
297
253
298
254
299
def get_name_ranking (self , trange : TokenRange ,
0 commit comments