Skip to content

Commit

Permalink
dealing with full stop words
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed Jan 30, 2019
1 parent 7f1cedf commit c6b174a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
2 changes: 1 addition & 1 deletion fast_autocomplete/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# flake8: noqa
__version__ = '0.2.4'
__version__ = '0.2.5'
import sys
pyversion = float(sys.version[:3])
if pyversion < 3.6:
Expand Down
30 changes: 17 additions & 13 deletions fast_autocomplete/dwg.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class AutoComplete:

CACHE_SIZE = 2048

def __init__(self, words, synonyms=None):
def __init__(self, words, synonyms=None, full_stop_words=None, logger=None):
"""
Inistializes the Autocomplete module
Expand All @@ -39,6 +39,8 @@ def __init__(self, words, synonyms=None):
self._lfu_cache = LFUCache(self.CACHE_SIZE)
self._clean_synonyms, self._partial_synonyms = self._get_clean_and_partial_synonyms()
self._reverse_synonyms = self._get_reverse_synonyms(self._clean_synonyms)
self._full_stop_words = frozenset(full_stop_words) if full_stop_words else None
self.logger = logger
self.words = words
new_words = self._get_partial_synonyms_to_words()
self.words.update(new_words)
Expand Down Expand Up @@ -206,14 +208,15 @@ def _find(self, word, max_cost, size, call_count=0):
fuzzy_min_distance = min_distance = INF
matched_prefix_of_last_word, rest_of_word, new_node, matched_words = self._prefix_autofill(word=word)

if matched_words and matched_words[-1] == 'bmw' and not rest_of_word:
print('!!!!!!')

last_word = matched_prefix_of_last_word + rest_of_word

if matched_words:
results[0] = [matched_words.copy()]
min_distance = 0
# under certain condition with finding full stop words, do not bother with finding more matches
if (self._full_stop_words and matched_words and matched_words[-1] in self._full_stop_words and not matched_prefix_of_last_word):
find_steps = [FindStep.start]
return results, find_steps
if len(rest_of_word) < 3:
find_steps = [FindStep.descendants_only]
self._add_descendants_words_to_results(node=new_node, size=size, matched_words=matched_words, results=results, distance=1)
Expand Down Expand Up @@ -364,8 +367,8 @@ def _prefix_autofill_part(self, word, node=None, matched_condition_ever=False, m

return matched_prefix_of_last_word, rest_of_word, node, matched_words, matched_condition_ever, matched_condition_in_branch

def _add_descendants_words_to_results(self, node, size, matched_words, results, distance):
descendant_words = list(node.get_descendants_words(size))
def _add_descendants_words_to_results(self, node, size, matched_words, results, distance, go_deep=True):
descendant_words = list(node.get_descendants_words(size, go_deep))
extended = _extend_and_repeat(matched_words, descendant_words)
if extended:
results[distance].extend(extended)
Expand Down Expand Up @@ -428,7 +431,7 @@ def value(self):
def __repr__(self):
return f'< children: {list(self.children.keys())}, word: {self.word} >'

def get_descendants_nodes(self, size):
def get_descendants_nodes(self, size, go_deep=True):

que = collections.deque()
unique_nodes = {self}
Expand All @@ -449,11 +452,12 @@ def get_descendants_nodes(self, size):
if len(found_words_set) > size:
break

for letter, grand_child_node in child_node.children.items():
if grand_child_node not in unique_nodes:
unique_nodes.add(grand_child_node)
que.append((letter, grand_child_node))
if go_deep:
for letter, grand_child_node in child_node.children.items():
if grand_child_node not in unique_nodes:
unique_nodes.add(grand_child_node)
que.append((letter, grand_child_node))

def get_descendants_words(self, size):
found_words_gen = self.get_descendants_nodes(size)
def get_descendants_words(self, size, go_deep=True):
found_words_gen = self.get_descendants_nodes(size, go_deep)
return map(lambda x: x.value, found_words_gen)

0 comments on commit c6b174a

Please sign in to comment.