From c6b174a3275dfe547386f671c0824c9e85b84de5 Mon Sep 17 00:00:00 2001 From: Sep Dehpour Date: Tue, 29 Jan 2019 23:59:44 -0800 Subject: [PATCH] dealing with full stop words --- fast_autocomplete/__init__.py | 2 +- fast_autocomplete/dwg.py | 30 +++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fast_autocomplete/__init__.py b/fast_autocomplete/__init__.py index 1ff89d1..64adae2 100644 --- a/fast_autocomplete/__init__.py +++ b/fast_autocomplete/__init__.py @@ -1,5 +1,5 @@ # flake8: noqa -__version__ = '0.2.4' +__version__ = '0.2.5' import sys pyversion = float(sys.version[:3]) if pyversion < 3.6: diff --git a/fast_autocomplete/dwg.py b/fast_autocomplete/dwg.py index f1408cd..2a0a674 100644 --- a/fast_autocomplete/dwg.py +++ b/fast_autocomplete/dwg.py @@ -25,7 +25,7 @@ class AutoComplete: CACHE_SIZE = 2048 - def __init__(self, words, synonyms=None): + def __init__(self, words, synonyms=None, full_stop_words=None, logger=None): """ Inistializes the Autocomplete module @@ -39,6 +39,8 @@ def __init__(self, words, synonyms=None): self._lfu_cache = LFUCache(self.CACHE_SIZE) self._clean_synonyms, self._partial_synonyms = self._get_clean_and_partial_synonyms() self._reverse_synonyms = self._get_reverse_synonyms(self._clean_synonyms) + self._full_stop_words = frozenset(full_stop_words) if full_stop_words else None + self.logger = logger self.words = words new_words = self._get_partial_synonyms_to_words() self.words.update(new_words) @@ -206,14 +208,15 @@ def _find(self, word, max_cost, size, call_count=0): fuzzy_min_distance = min_distance = INF matched_prefix_of_last_word, rest_of_word, new_node, matched_words = self._prefix_autofill(word=word) - if matched_words and matched_words[-1] == 'bmw' and not rest_of_word: - print('!!!!!!') - last_word = matched_prefix_of_last_word + rest_of_word if matched_words: results[0] = [matched_words.copy()] min_distance = 0 + # under certain condition with finding full stop words, do not bother with finding more matches + if (self._full_stop_words and matched_words and matched_words[-1] in self._full_stop_words and not matched_prefix_of_last_word): + find_steps = [FindStep.start] + return results, find_steps if len(rest_of_word) < 3: find_steps = [FindStep.descendants_only] self._add_descendants_words_to_results(node=new_node, size=size, matched_words=matched_words, results=results, distance=1) @@ -364,8 +367,8 @@ def _prefix_autofill_part(self, word, node=None, matched_condition_ever=False, m return matched_prefix_of_last_word, rest_of_word, node, matched_words, matched_condition_ever, matched_condition_in_branch - def _add_descendants_words_to_results(self, node, size, matched_words, results, distance): - descendant_words = list(node.get_descendants_words(size)) + def _add_descendants_words_to_results(self, node, size, matched_words, results, distance, go_deep=True): + descendant_words = list(node.get_descendants_words(size, go_deep)) extended = _extend_and_repeat(matched_words, descendant_words) if extended: results[distance].extend(extended) @@ -428,7 +431,7 @@ def value(self): def __repr__(self): return f'< children: {list(self.children.keys())}, word: {self.word} >' - def get_descendants_nodes(self, size): + def get_descendants_nodes(self, size, go_deep=True): que = collections.deque() unique_nodes = {self} @@ -449,11 +452,12 @@ def get_descendants_nodes(self, size): if len(found_words_set) > size: break - for letter, grand_child_node in child_node.children.items(): - if grand_child_node not in unique_nodes: - unique_nodes.add(grand_child_node) - que.append((letter, grand_child_node)) + if go_deep: + for letter, grand_child_node in child_node.children.items(): + if grand_child_node not in unique_nodes: + unique_nodes.add(grand_child_node) + que.append((letter, grand_child_node)) - def get_descendants_words(self, size): - found_words_gen = self.get_descendants_nodes(size) + def get_descendants_words(self, size, go_deep=True): + found_words_gen = self.get_descendants_nodes(size, go_deep) return map(lambda x: x.value, found_words_gen)