diff --git a/fast_autocomplete/__init__.py b/fast_autocomplete/__init__.py index b3d5bae..1ff89d1 100644 --- a/fast_autocomplete/__init__.py +++ b/fast_autocomplete/__init__.py @@ -1,5 +1,5 @@ # flake8: noqa -__version__ = '0.2.3' +__version__ = '0.2.4' import sys pyversion = float(sys.version[:3]) if pyversion < 3.6: @@ -8,3 +8,4 @@ from fast_autocomplete.dwg import AutoComplete from fast_autocomplete.draw import DrawGraphMixin from fast_autocomplete.demo import demo +from fast_autocomplete.normalize import normalize_node_name diff --git a/fast_autocomplete/dwg.py b/fast_autocomplete/dwg.py index 39ed141..f1408cd 100644 --- a/fast_autocomplete/dwg.py +++ b/fast_autocomplete/dwg.py @@ -4,6 +4,7 @@ from collections import defaultdict from fast_autocomplete.lfucache import LFUCache from fast_autocomplete.misc import _extend_and_repeat +from fast_autocomplete.normalize import normalize_node_name from Levenshtein import distance as levenshtein_distance DELIMITER = '__' @@ -174,7 +175,7 @@ def search(self, word, max_cost=2, size=5): - max_cost: Maximum Levenshtein edit distance to be considered when calculating results - size: The max number of results to return """ - word = word.lower().strip() + word = normalize_node_name(word) if not word: return [] key = f'{word}-{max_cost}-{size}' @@ -203,10 +204,11 @@ def _find(self, word, max_cost, size, call_count=0): fuzzy_matches_len = 0 fuzzy_min_distance = min_distance = INF - # if word == 'mercedes s': - # import pytest; pytest.set_trace() matched_prefix_of_last_word, rest_of_word, new_node, matched_words = self._prefix_autofill(word=word) + if matched_words and matched_words[-1] == 'bmw' and not rest_of_word: + print('!!!!!!') + last_word = matched_prefix_of_last_word + rest_of_word if matched_words: diff --git a/fast_autocomplete/normalize.py b/fast_autocomplete/normalize.py new file mode 100644 index 0000000..669d270 --- /dev/null +++ b/fast_autocomplete/normalize.py @@ -0,0 +1,36 @@ +import string +from fast_autocomplete.lfucache import LFUCache + +valid_chars_for_string = {i for i in string.ascii_letters.lower()} +valid_chars_for_integer = {i for i in string.digits} +valid_chars_for_node_name = {' ', '-'} | valid_chars_for_string | valid_chars_for_integer + +NORMALIZED_CACHE_SIZE = 2048 +MAX_WORD_LENGTH = 40 + +_normalized_lfu_cache = LFUCache(NORMALIZED_CACHE_SIZE) + + +def normalize_node_name(name): + name = name[:MAX_WORD_LENGTH] + result = _normalized_lfu_cache.get(name) + if result == -1: + result = _get_normalized_node_name(name) + _normalized_lfu_cache.set(name, result) + return result + + +def _get_normalized_node_name(name): + name = name.lower() + result = [] + last_i = None + for i in name: + if i in valid_chars_for_node_name: + if i == '-': + i = ' ' + elif (i in valid_chars_for_integer and last_i in valid_chars_for_string) or (i in valid_chars_for_string and last_i in valid_chars_for_integer): + result.append(' ') + if not(i == last_i == ' '): + result.append(i) + last_i = i + return ''.join(result).strip()