Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align on facet variable naming convention #39

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 67 additions & 62 deletions autocompleter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,25 +172,26 @@ def get_old_norm_terms(cls, obj_id):
return old_terms

@classmethod
def get_old_facets(cls, obj_id):
# Todo: Rename this to get_old_facet_dicts or get_old_facet_data
def get_old_facet_dicts(cls, obj_id):
facet_map_name = FACET_MAP_BASE_NAME % (cls.get_provider_name(),)
old_facets = REDIS.hget(facet_map_name, obj_id)
if old_facets is not None:
old_facets = cls._deserialize_data(old_facets)
return old_facets
old_facet_dicts = REDIS.hget(facet_map_name, obj_id)
if old_facet_dicts is not None:
old_facet_dicts = cls._deserialize_data(old_facet_dicts)
return old_facet_dicts

@classmethod
def clear_facets(cls, obj_id, old_facets):
def clear_facets(cls, obj_id, old_facet_dicts):
"""
For a given object ID, delete old facet data from Redis.
"""
provider_name = cls.get_provider_name()
pipe = REDIS.pipeline()
# Remove old facets from the corresponding facet sorted set containing scores
for facet in old_facets:
for facet_dict in old_facet_dicts:
try:
facet_name = facet["key"]
facet_value = facet["value"]
facet_name = facet_dict["key"]
facet_value = facet_dict["value"]
facet_set_name = FACET_SET_BASE_NAME % (
provider_name,
facet_name,
Expand Down Expand Up @@ -287,21 +288,21 @@ def store(self, delete_old=True):
norm_terms = self.__class__._get_norm_terms(terms)
score = self._get_score()
data = self.get_data()
facets = self.get_facets()
facet_names = self.get_facets()

# Get all the facet values from the data dict
facet_dicts = []
for facet in facets:
for facet_name in facet_names:
try:
facet_dicts.append({"key": facet, "value": data[facet]})
facet_dicts.append({"key": facet_name, "value": data[facet_name]})
except KeyError:
continue

old_norm_terms = self.__class__.get_old_norm_terms(obj_id)
old_facets = self.__class__.get_old_facets(obj_id)
old_facet_dicts = self.__class__.get_old_facet_dicts(obj_id)

norm_terms_updated = norm_terms != old_norm_terms
facets_updated = facets != old_facets
facets_updated = facet_names != old_facet_dicts
Copy link
Author

@nathanielw44 nathanielw44 Nov 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@anthonyp97 This is the bug you fixed you fixed in #38 - comparing facet_names with facet dicts. Decided to leave it alone for now because fixing it breaks a ton of tests, so I think it warrants its own PR


# Check if the terms or facets have been updated. If both weren't updated,
# then we can just update the data payload and short circuit.
Expand All @@ -317,8 +318,8 @@ def store(self, delete_old=True):
# doing 2 extra redis queries.
if norm_terms_updated and old_norm_terms is not None:
self.__class__.clear_keys(obj_id, old_norm_terms)
if facets_updated and old_facets is not None:
self.__class__.clear_facets(obj_id, old_facets)
if facets_updated and old_facet_dicts is not None:
self.__class__.clear_facets(obj_id, old_facet_dicts)

# Start pipeline
pipe = REDIS.pipeline()
Expand Down Expand Up @@ -362,11 +363,11 @@ def store(self, delete_old=True):
key = EXACT_SET_BASE_NAME % (provider_name,)
pipe.sadd(key, norm_term)

for facet in facet_dicts:
for facet_dict in facet_dicts:
key = FACET_SET_BASE_NAME % (
provider_name,
facet["key"],
facet["value"],
facet_dict["key"],
facet_dict["value"],
)
pipe.zadd(key, {obj_id: score})

Expand Down Expand Up @@ -396,9 +397,9 @@ def remove(self):
terms = self.__class__.get_old_norm_terms(obj_id)
if terms is not None:
self.__class__.clear_keys(obj_id, terms)
facets = self.__class__.get_old_facets(obj_id)
if facets is not None:
self.__class__.clear_facets(obj_id, facets)
facet_dicts = self.__class__.get_old_facet_dicts(obj_id)
if facet_dicts is not None:
self.__class__.clear_facets(obj_id, facet_dicts)


class AutocompleterModelProvider(AutocompleterProviderBase):
Expand Down Expand Up @@ -596,14 +597,16 @@ def clear_cache(self):

def suggest(self, term, facets=[]):
"""
Suggest matching objects, given a term
Suggest matching objects, given a term and optional facets
"""
facet_groups = facets

providers = self._get_all_providers_by_autocompleter()
if providers is None:
return []

# If we have a cached version of the search results available, return it!
hashed_facets = self.hash_facets(facets)
hashed_facets = self.hash_facets(facet_groups)
cache_key = CACHE_BASE_NAME % (
self.name,
utils.get_normalized_term(term, settings.JOIN_CHARS),
Expand Down Expand Up @@ -637,13 +640,13 @@ def suggest(self, term, facets=[]):
facet_final_exact_match_key,
}

facet_keys_set = set()
if len(facets) > 0:
facet_names_set = set()
if len(facet_groups) > 0:
# we use from_iterable to flatten the list comprehension into a single list
sub_facets = itertools.chain.from_iterable(
[facet["facets"] for facet in facets]
facet_dicts = itertools.chain.from_iterable(
[facet_group["facets"] for facet_group in facet_groups]
)
facet_keys_set = set([sub_facet["key"] for sub_facet in sub_facets])
facet_names_set = set([facet_dict["key"] for facet_dict in facet_dicts])

MOVE_EXACT_MATCHES_TO_TOP = registry.get_autocompleter_setting(
self.name, "MOVE_EXACT_MATCHES_TO_TOP"
Expand Down Expand Up @@ -689,21 +692,21 @@ def suggest(self, term, facets=[]):
pipe.zunionstore(final_result_key, term_result_keys, aggregate="MIN")

use_facets = False
if len(facet_keys_set) > 0:
provider_keys_set = set(provider.get_facets())
if facet_keys_set.issubset(provider_keys_set):
if len(facet_names_set) > 0:
provider_facet_names_set = set(provider.get_facets())
if facet_names_set.issubset(provider_facet_names_set):
use_facets = True

if use_facets:
facet_result_keys = []
for facet in facets:
facet_group_result_keys = []
for facet_group in facet_groups:
try:
facet_type = facet["type"]
facet_type = facet_group["type"]
if facet_type not in ["and", "or"]:
continue
facet_list = facet["facets"]
facet_dict_list = facet_group["facets"]
facet_set_keys = []
for facet_dict in facet_list:
for facet_dict in facet_dict_list:
facet_set_key = FACET_SET_BASE_NAME % (
provider_name,
facet_dict["key"],
Expand All @@ -712,10 +715,10 @@ def suggest(self, term, facets=[]):
facet_set_keys.append(facet_set_key)

if len(facet_set_keys) == 1:
facet_result_keys.append(facet_set_keys[0])
facet_group_result_keys.append(facet_set_keys[0])
else:
facet_result_key = RESULT_SET_BASE_NAME % str(uuid.uuid4())
facet_result_keys.append(facet_result_key)
facet_group_result_keys.append(facet_result_key)
keys_to_delete.add(facet_result_key)
if facet_type == "and":
pipe.zinterstore(
Expand All @@ -728,12 +731,14 @@ def suggest(self, term, facets=[]):
except KeyError:
continue

# We want to calculate the intersection of all the intermediate facet sets created so far
# along with the final result set. So we append the final_result_key to the list of
# facet_result_keys and store the intersection in the faceted final result set.
# In order for a result to be considered as a valid result for the suggest call, it must be a term match
# AND match the conditions of each facet group. So, to narrow down the possible results set to match
# these conditions we want to calculate the intersection of:
# - The set of matching results for each facet group (facet_group_result_keys)
# - The current running set of term match results. (final_result_key)
pipe.zinterstore(
facet_final_result_key,
facet_result_keys + [final_result_key],
facet_group_result_keys + [final_result_key],
aggregate="MIN",
)

Expand Down Expand Up @@ -770,7 +775,7 @@ def suggest(self, term, facets=[]):
if use_facets:
pipe.zinterstore(
facet_final_exact_match_key,
facet_result_keys + [final_exact_match_key],
facet_group_result_keys + [final_exact_match_key],
aggregate="MIN",
)
pipe.zrange(facet_final_exact_match_key, 0, MAX_RESULTS - 1)
Expand Down Expand Up @@ -1031,31 +1036,31 @@ def chunk_list(lst, chunk_size):
yield lst[i : i + chunk_size]

@staticmethod
def hash_facets(facets):
def hash_facets(facet_groups):
"""
Given an array of facet data, return a deterministic hash such that
the ordering of keys inside the facet dicts does not matter.
Given an array of facet group data, return a deterministic hash such that
the ordering of keys inside the groups' facet dicts does not matter.
"""

def sha1_digest(my_str):
return sha1(my_str.encode(encoding="UTF-8")).hexdigest()

facet_hashes = []
for facet in facets:
sub_facet_hashes = []
facet_type = facet["type"]
sub_facets = facet["facets"]
for sub_facet in sub_facets:
sub_facet_str = (
"key:" + sub_facet["key"] + "value:" + str(sub_facet["value"])
facet_group_hashes = []
for facet_group in facet_groups:
facet_dict_hashes = []
facet_group_type = facet_group["type"]
facet_dicts = facet_group["facets"]
for facet_dict in facet_dicts:
facet_dict_str = (
"key:" + facet_dict["key"] + "value:" + str(facet_dict["value"])
)
sub_facet_hashes.append(sha1_digest(sub_facet_str))
sub_facet_hashes.sort()
facet_str = "type:" + facet_type + "facets:" + str(sub_facet_hashes)
facet_hashes.append(sha1_digest(facet_str))
facet_hashes.sort()
final_facet_hash = sha1_digest(str(facet_hashes))
return final_facet_hash
facet_dict_hashes.append(sha1_digest(facet_dict_str))
facet_dict_hashes.sort()
facet_group_str = "type:" + facet_group_type + "facets:" + str(facet_dict_hashes)
facet_group_hashes.append(sha1_digest(facet_group_str))
facet_group_hashes.sort()
final_hash = sha1_digest(str(facet_group_hashes))
return final_hash

@staticmethod
def normalize_rounding(value):
Expand Down
26 changes: 13 additions & 13 deletions autocompleter/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ def get(self, request, name):
term = request.GET[settings.SUGGEST_PARAMETER_NAME]
ac = Autocompleter(name)
if settings.FACET_PARAMETER_NAME in request.GET:
facets = request.GET[settings.FACET_PARAMETER_NAME]
facets = json.loads(facets)
if not self.validate_facets(facets):
facet_groups = request.GET[settings.FACET_PARAMETER_NAME]
facet_groups = json.loads(facet_groups)
if not self.validate_facets(facet_groups):
return HttpResponseBadRequest("Malformed facet parameter.")
results = ac.suggest(term, facets=facets)
results = ac.suggest(term, facets=facet_groups)
else:
results = ac.suggest(term)

Expand All @@ -26,22 +26,22 @@ def get(self, request, name):
return HttpResponseServerError("Search parameter not found.")

@staticmethod
def validate_facets(facets):
def validate_facets(facet_groups):
"""
Validates the facets list has all the keys we expect as well
Validates the list of facet groups has all the keys we expect as well
as the correct facet types.
"""
for facet in facets:
for facet_group in facet_groups:
try:
facet_type = facet["type"]
facet_type = facet_group["type"]
if facet_type not in ["and", "or"]:
return False
sub_facets = facet["facets"]
if len(sub_facets) == 0:
facet_dicts = facet_group["facets"]
if len(facet_dicts) == 0:
return False
for sub_facet in sub_facets:
sub_facet["key"]
sub_facet["value"]
for facet_dict in facet_dicts:
facet_dict["key"]
facet_dict["value"]
except (KeyError, TypeError):
return False
return True
Expand Down