-
-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import os | ||
import gzip | ||
import json | ||
import logging | ||
try: | ||
from redis import StrictRedis | ||
except ImportError: | ||
StrictRedis = None | ||
|
||
from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union | ||
from fast_autocomplete import AutoComplete | ||
|
||
|
||
def read_local_dump(filepath: str): | ||
with open(filepath, 'r') as the_file: | ||
return the_file.read() | ||
|
||
|
||
def _simple_compress(item: str, hash_to_val: Dict[int, str]) -> str: | ||
item_hash = hash(item) | ||
if item_hash in hash_to_val: | ||
item = hash_to_val[item_hash] | ||
else: | ||
hash_to_val[item_hash] = item | ||
return item | ||
|
||
|
||
class WordValue(NamedTuple): | ||
context: Any | ||
display: Any | ||
count: int = 0 | ||
original_key: 'WordValue' = None | ||
|
||
def get(self, key: str, default: Optional[str] = None) -> str: | ||
result = getattr(self, key) | ||
if result is None: | ||
result = default | ||
return result | ||
|
||
|
||
def get_all_content(content_files, redis_client=None, redis_key_prefix=None, logger=None): | ||
""" | ||
Get all content that is needed to initialize Autocomplete. | ||
:param: redis_client (optional) If passed, it tries to load from Redis if there is already cached data | ||
""" | ||
kwargs = {} | ||
for key, info in content_files.items(): | ||
kwargs[key] = get_data( | ||
filepath=info['filepath'], | ||
compress=info['compress'], | ||
redis_client=redis_client, | ||
redis_key_prefix=redis_key_prefix, | ||
logger=logger | ||
) | ||
if logger: | ||
kwargs['logger'] = logger | ||
return kwargs | ||
|
||
|
||
def get_data(filepath: str, compress: bool = False, | ||
redis_client: Optional[StrictRedis] = None, | ||
redis_key_prefix: Optional[str] = None, | ||
logger: Optional[logging.RootLogger] = None) -> Dict[str, List[str]]: | ||
data_json = None | ||
filename = os.path.basename(filepath) | ||
if redis_client and redis_key_prefix: | ||
key = redis_key_prefix.format(filename) | ||
try: | ||
data_json = redis_client.get(key) | ||
except Exception: | ||
if logger: | ||
logger.exception('Unable to get the search graph words from Redis.') | ||
else: | ||
print('Unable to get the search graph words from Redis.') | ||
if data_json: | ||
data_json = gzip.decompress(data_json).decode('utf-8') | ||
if not data_json: | ||
data_json = read_local_dump(filepath) | ||
data = json.loads(data_json) | ||
|
||
if compress: | ||
hash_to_val = {} | ||
|
||
for word, value in data.items(): | ||
context, display, count = value | ||
display = _simple_compress(item=display, hash_to_val=hash_to_val) | ||
for key, val in context.items(): | ||
context[key] = _simple_compress( | ||
item=context[key], hash_to_val=hash_to_val | ||
) | ||
data[word] = WordValue(context=context, display=display, count=count) | ||
|
||
return data | ||
|
||
|
||
def populate_redis(content_files, redis_client, redis_cache_prefix): | ||
""" | ||
Populate Redis with data based on the local files | ||
""" | ||
for key, info in content_files.items(): | ||
filename = os.path.basename(info['filepath']) | ||
redis_key = redis_cache_prefix.format(filename) | ||
data = read_local_dump(info['filepath']) | ||
compressed = gzip.compress(data.encode('utf-8')) | ||
redis_client.set(redis_key, compressed) | ||
|
||
|
||
def autocomplete_factory( | ||
content_files, redis_client=None, module=AutoComplete, logger=None | ||
): | ||
""" | ||
Factory function to initialize the proper Vehicle Autocomplete object | ||
:param: content_files: The file paths and options where data is stored. | ||
Example | ||
content_files = { | ||
'synonyms': { | ||
'filename': 'path/to/synonyms.json', | ||
'compress': False | ||
}, | ||
'words': { | ||
'filename': 'path/to/words.json', | ||
'compress': True | ||
}, | ||
'full_stop_words': { | ||
'filename': 'path/to/full_stop_words.json', | ||
'compress': False | ||
} | ||
} | ||
:param: redis_client: (optional) If passed, the factor function tries to load the data from Redis | ||
and if that fails, it will load the local data. | ||
:param: module: (optional) The AutoComplete module to initialize | ||
""" | ||
kwargs = get_all_content(content_files, redis_client=redis_client, logger=logger) | ||
return module(**kwargs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
{ | ||
"acura rlx": [ | ||
{ | ||
"model": "rlx", | ||
"make": "acura" | ||
}, | ||
"Acura RLX", | ||
3132 | ||
], | ||
"rlx": [ | ||
{ | ||
"model": "rlx", | ||
"make": "acura" | ||
}, | ||
"Acura RLX", | ||
3132 | ||
], | ||
"acura": [ | ||
{ | ||
"make": "acura" | ||
}, | ||
"Acura", | ||
130123 | ||
], | ||
"acura rlx sport hybrid": [ | ||
{ | ||
"model": "rlx sport hybrid", | ||
"make": "acura" | ||
}, | ||
"Acura RLX Sport Hybrid", | ||
4 | ||
], | ||
"rlx sport hybrid": [ | ||
{ | ||
"model": "rlx sport hybrid", | ||
"make": "acura" | ||
}, | ||
"Acura RLX Sport Hybrid", | ||
4 | ||
], | ||
"acura ilx": [ | ||
{ | ||
"model": "ilx--ilx hybrid", | ||
"make": "acura" | ||
}, | ||
"Acura ILX", | ||
19936 | ||
], | ||
"ilx": [ | ||
{ | ||
"model": "ilx--ilx hybrid", | ||
"make": "acura" | ||
}, | ||
"Acura ILX", | ||
19936 | ||
], | ||
"acura mdx": [ | ||
{ | ||
"model": "mdx", | ||
"make": "acura" | ||
}, | ||
"Acura MDX", | ||
35290 | ||
], | ||
"mdx": [ | ||
{ | ||
"model": "mdx", | ||
"make": "acura" | ||
}, | ||
"Acura MDX", | ||
35290 | ||
], | ||
"acura nsx": [ | ||
{ | ||
"model": "nsx", | ||
"make": "acura" | ||
}, | ||
"Acura NSX", | ||
271 | ||
], | ||
"nsx": [ | ||
{ | ||
"model": "nsx", | ||
"make": "acura" | ||
}, | ||
"Acura NSX", | ||
271 | ||
], | ||
"acura rdx": [ | ||
{ | ||
"model": "rdx", | ||
"make": "acura" | ||
}, | ||
"Acura RDX", | ||
33905 | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import os | ||
import pytest | ||
from fast_autocomplete import autocomplete_factory, AutoComplete | ||
|
||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
fixture_dir = os.path.join(current_dir, 'fixtures') | ||
|
||
content_files = { | ||
'words': { | ||
'filepath': os.path.join(fixture_dir, 'sample_words.json'), | ||
'compress': True # means compress the graph data in memory | ||
} | ||
} | ||
|
||
autocomplete = autocomplete_factory(content_files=content_files) | ||
|
||
|
||
class AutoCompleteIgnoreCount(AutoComplete): | ||
SHOULD_INCLUDE_COUNT = False | ||
|
||
|
||
autocomplete_ignore_count = autocomplete_factory(content_files=content_files, module=AutoCompleteIgnoreCount) | ||
|
||
|
||
class TestLoader: | ||
|
||
@pytest.mark.parametrize('word, expected_result, expected_unsorted_result', [ | ||
('acu', | ||
[['acura'], ['acura mdx'], ['acura rdx']], | ||
[['acura'], ['acura rlx'], ['acura rdx']]), | ||
]) | ||
def test_loader(self, word, expected_result, expected_unsorted_result): | ||
result = autocomplete.search(word=word, size=3) | ||
assert expected_result == result | ||
|
||
result = autocomplete_ignore_count.search(word=word, size=3) | ||
assert expected_unsorted_result == result |