From 8e69e158113c82f59f03c9fe8242781450986bbf Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Wed, 20 Nov 2024 00:31:32 +0000 Subject: [PATCH] chore:normalize document types to lower case and update filtering Ensure document types from user input are normalized to lower case for consistency. Improved the filtering logic in search results to support case-insensitive matches by using more efficient Q objects, and changed request parameter processing to handle multiple document types correctly. --- orp/orp_search/config.py | 4 +++- orp/orp_search/utils/search.py | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/orp/orp_search/config.py b/orp/orp_search/config.py index 1e769f2..0764177 100644 --- a/orp/orp_search/config.py +++ b/orp/orp_search/config.py @@ -25,7 +25,7 @@ def __init__( request. """ self.search_query = search_query - self.document_types = document_types + self.document_types = [doc_type.lower() for doc_type in document_types] self.timeout = None if timeout is None else int(timeout) self.limit = limit self.offset = offset @@ -33,6 +33,8 @@ def __init__( self.sort_by = sort_by self.id = id + logger.info(f"document_types from request: {self.document_types}") + def validate(self): """ diff --git a/orp/orp_search/utils/search.py b/orp/orp_search/utils/search.py index 0522b9c..fd27695 100644 --- a/orp/orp_search/utils/search.py +++ b/orp/orp_search/utils/search.py @@ -9,7 +9,7 @@ from orp_search.utils.terms import sanitize_input from django.contrib.postgres.search import SearchQuery, SearchVector -from django.db.models import QuerySet +from django.db.models import Q, QuerySet from django.http import HttpRequest logger = logging.getLogger(__name__) @@ -84,24 +84,26 @@ def search_database( # Filter results based on document types if provided queryset = DataResponseModel.objects.annotate(search=vector).filter( search=query_objs, - **( - { - "type__in": [ - doc_type.lower() for doc_type in config.document_types - ] - } - if config.document_types - else {} - ), ) + # Filter by document types + if config.document_types: + # Start with an empty Q object + query = Q() + # Loop through the document types and add a Q object for each one + for doc_type in config.document_types: + query |= Q(type__icontains=doc_type.lower()) + + # Filter the queryset using the complex Q object + queryset = queryset.filter(query) + # Filter by publisher if config.publisher_names: queryset = queryset.filter(publisher__in=config.publisher_names) # Sort results based on the sort_by parameter (default) if config.sort_by is None or config.sort_by == "recent": - return queryset.order_by("-date_modified") + return queryset.order_by("-sort_date") if config.sort_by is not None and config.sort_by == "relevance": # Calculate the score for each document @@ -114,7 +116,7 @@ def search(context: dict, request: HttpRequest) -> dict: start_time = time.time() search_query = request.GET.get("query", "") - document_types = request.GET.get("document_type", "").lower().split(",") + document_types = request.GET.getlist("document_type", []) offset = request.GET.get("page", "1") offset = int(offset) if offset.isdigit() else 1 limit = request.GET.get("limit", "10")