Skip to content

Commit

Permalink
moving word normalization to this repo
Browse files Browse the repository at this point in the history
  • Loading branch information
seperman committed Jan 30, 2019
1 parent 547b6c3 commit 7f1cedf
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
3 changes: 2 additions & 1 deletion fast_autocomplete/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# flake8: noqa
__version__ = '0.2.3'
__version__ = '0.2.4'
import sys
pyversion = float(sys.version[:3])
if pyversion < 3.6:
Expand All @@ -8,3 +8,4 @@
from fast_autocomplete.dwg import AutoComplete
from fast_autocomplete.draw import DrawGraphMixin
from fast_autocomplete.demo import demo
from fast_autocomplete.normalize import normalize_node_name
8 changes: 5 additions & 3 deletions fast_autocomplete/dwg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections import defaultdict
from fast_autocomplete.lfucache import LFUCache
from fast_autocomplete.misc import _extend_and_repeat
from fast_autocomplete.normalize import normalize_node_name
from Levenshtein import distance as levenshtein_distance

DELIMITER = '__'
Expand Down Expand Up @@ -174,7 +175,7 @@ def search(self, word, max_cost=2, size=5):
- max_cost: Maximum Levenshtein edit distance to be considered when calculating results
- size: The max number of results to return
"""
word = word.lower().strip()
word = normalize_node_name(word)
if not word:
return []
key = f'{word}-{max_cost}-{size}'
Expand Down Expand Up @@ -203,10 +204,11 @@ def _find(self, word, max_cost, size, call_count=0):
fuzzy_matches_len = 0

fuzzy_min_distance = min_distance = INF
# if word == 'mercedes s':
# import pytest; pytest.set_trace()
matched_prefix_of_last_word, rest_of_word, new_node, matched_words = self._prefix_autofill(word=word)

if matched_words and matched_words[-1] == 'bmw' and not rest_of_word:
print('!!!!!!')

last_word = matched_prefix_of_last_word + rest_of_word

if matched_words:
Expand Down
36 changes: 36 additions & 0 deletions fast_autocomplete/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import string
from fast_autocomplete.lfucache import LFUCache

valid_chars_for_string = {i for i in string.ascii_letters.lower()}
valid_chars_for_integer = {i for i in string.digits}
valid_chars_for_node_name = {' ', '-'} | valid_chars_for_string | valid_chars_for_integer

NORMALIZED_CACHE_SIZE = 2048
MAX_WORD_LENGTH = 40

_normalized_lfu_cache = LFUCache(NORMALIZED_CACHE_SIZE)


def normalize_node_name(name):
name = name[:MAX_WORD_LENGTH]
result = _normalized_lfu_cache.get(name)
if result == -1:
result = _get_normalized_node_name(name)
_normalized_lfu_cache.set(name, result)
return result


def _get_normalized_node_name(name):
name = name.lower()
result = []
last_i = None
for i in name:
if i in valid_chars_for_node_name:
if i == '-':
i = ' '
elif (i in valid_chars_for_integer and last_i in valid_chars_for_string) or (i in valid_chars_for_string and last_i in valid_chars_for_integer):
result.append(' ')
if not(i == last_i == ' '):
result.append(i)
last_i = i
return ''.join(result).strip()

0 comments on commit 7f1cedf

Please sign in to comment.