From 0c037076fe689548e0c06d9cb84dc766829aa4ed Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 25 Nov 2024 22:37:09 +0000 Subject: [PATCH 1/7] chore:add detailed docstrings to various modules and functions This commit introduces comprehensive docstrings across multiple files in the project to improve code readability and maintainability. These additions clarify the purpose, parameters, returns, and additional notes for existing functions, methods, and class definitions. Enhanced documentation will aid future developers in understanding the codebase more efficiently. --- orp/orp_search/apps.py | 11 +++ orp/orp_search/config.py | 74 ++++++++++++++---- orp/orp_search/construction_legislation.py | 6 ++ orp/orp_search/legislation.py | 88 ++++++++++++++++++++-- orp/orp_search/models.py | 63 +++++++++++----- orp/orp_search/utils/documents.py | 7 +- orp/orp_search/utils/paginate.py | 46 +++++++++++ orp/orp_search/views.py | 31 +++++++- 8 files changed, 285 insertions(+), 41 deletions(-) diff --git a/orp/orp_search/apps.py b/orp/orp_search/apps.py index d8e653f..728cd76 100644 --- a/orp/orp_search/apps.py +++ b/orp/orp_search/apps.py @@ -2,6 +2,17 @@ class SearchConfig(AppConfig): + """ + Configuration class for the ORP Search application. + + Attributes: + name (str): The full Python path to the application. + verbose_name (str): A human-readable name for the application. + default_auto_field (str): Specifies the type of auto-created + primary key field to use. + + """ + name = "orp_search" verbose_name = "ORP application functionality" default_auto_field = "django.db.models.BigAutoField" diff --git a/orp/orp_search/config.py b/orp/orp_search/config.py index 1c03ec1..f08849a 100644 --- a/orp/orp_search/config.py +++ b/orp/orp_search/config.py @@ -16,13 +16,41 @@ def __init__( id=None, ): """ - Initializes a new instance of the class. + Initializes the SearchRequest object with the given parameters. - :param searchTerms: A comma-separated string of search terms. - :param documentTypes: Optional. A list of document types - to filter the search. - :param timeout: Optional. The timeout in seconds for the search - request. + Args: + search_query (str): The search query string. + document_types (Optional[List[str]]): + A list of document types to filter by. Defaults to None. + timeout (Optional[int]): + The timeout value for the request in seconds. Defaults to None. + limit (int): + The maximum number of search results to return. Defaults to 10. + offset (int): + The starting position of the search results. Defaults to 1. + publisher_names (Optional[List[str]]): + A list of publisher names to filter by. Defaults to None. + sort_by (Optional[str]): + The field by which to sort the search results. Defaults to + None. + id (Optional[str]): + An optional identifier for the search request. Defaults to + None. + + Attributes: + search_query (str): The search query string. + document_types (Optional[List[str]]): + A list of document types to filter by. + timeout (Optional[int]): + The timeout value for the request in seconds. + limit (int): The maximum number of search results to return. + offset (int): The starting position of the search results. + publisher_names (Optional[List[str]]): + A list of publisher names to filter by. + sort_by (Optional[str]): + The field by which to sort the search results. + id (Optional[str]): + An optional identifier for the search request. """ self.search_query = search_query self.document_types = ( @@ -46,17 +74,20 @@ def __init__( def validate(self): """ + Validates the constraints defined for offset, limit, + and sort_by attributes. - Validates the presence of search terms. + Returns: + bool + True if all constraints are satisfied, False otherwise. - Checks if the 'searchTerms' attribute exists and is non-empty. Logs - an error message and returns False if 'searchTerms' is missing or - empty. + Notes: + - The offset must be a non-negative integer. + - The limit must be a non-negative integer. + - The sort_by attribute, if specified, must be either + 'recent' or 'relevance'. - Returns - ------- - bool - True if 'searchTerms' is present and non-empty, False otherwise. + Errors are logged if any of the constraints are violated. """ if self.offset < 0: logger.error("offset must be a positive integer") @@ -73,6 +104,21 @@ def validate(self): return True def print_to_log(self): + """ + + Logs the current state of various search parameters. + + Logs the following attributes: + - search_query: The search query string. + - document_types: The list of document types being searched. + - timeout: The timeout value for the search query. + - limit: The maximum number of results to return. + - offset: The starting point from which results are returned. + - publisher_names: The list of publisher names to filter the search. + - sort_by: The criteria for sorting the search results. + - id: The unique identifier for the search query. + + """ logger.info(f"search_query: {self.search_query}") logger.info(f"document_types: {self.document_types}") logger.info(f"timeout: {self.timeout}") diff --git a/orp/orp_search/construction_legislation.py b/orp/orp_search/construction_legislation.py index 6a2ba63..010ac71 100644 --- a/orp/orp_search/construction_legislation.py +++ b/orp/orp_search/construction_legislation.py @@ -1936,6 +1936,12 @@ def construction_legislation_dataframe(): + """ + Reads CSV data from a predefined string, converts the data into a Pandas DataFrame, and returns the resulting DataFrame. + + Returns: + pandas.DataFrame: The dataframe containing the CSV data. + """ # Use StringIO to simulate reading from a file csv_data = StringIO(_csv_text) diff --git a/orp/orp_search/legislation.py b/orp/orp_search/legislation.py index 789d403..b0a930a 100644 --- a/orp/orp_search/legislation.py +++ b/orp/orp_search/legislation.py @@ -1,4 +1,3 @@ -import base64 import logging import re import xml.etree.ElementTree as ET # nosec BXXX @@ -20,12 +19,23 @@ logger = logging.getLogger(__name__) -def _encode_url(url): - encoded_bytes = base64.urlsafe_b64encode(url.encode("utf-8")) - return encoded_bytes.decode("utf-8") - - def _get_url_data(config, url): + """ + Fetch data from a given URL and return the response text if successful, + otherwise log the error. + + Parameters: + - config: Configuration object that includes the request timeout. + - url: String representing the URL to request. + + Returns: + - Response text if the status code is 200. + - None if the response status code is not 200, or if there is an exception + during the request. + + Logs: + - Error messages for request failures and non-200 response codes. + """ try: response = requests.get(url, timeout=config.timeout) # nosec BXXX if response.status_code == 200: @@ -43,11 +53,36 @@ def _get_url_data(config, url): def _get_text_from_element(element: Optional[ET.Element]) -> Optional[str]: + """ + Extracts and returns the text content from an XML element if it exists. + + This function checks if the provided XML element is not None. + If the element is available, it returns the text content of that element. + If the element is None, it returns None. + + Parameters: + element (Optional[ET.Element]): + The XML element from which to extract the text. + + Returns: + Optional[str]: + The text content of the element if it exists, otherwise None. + """ return element.text if element is not None else None class Legislation: def __init__(self): + """ + Initializes the class instance and defines the XML namespaces. + + Attributes: + _namespaces (dict): + A dictionary containing XML namespaces with their + corresponding URLs. These namespaces are used to + refer to elements in XML documents adhering to + different XML schemas. + """ # Define the XML namespaces self._namespaces = { "leg": "http://www.legislation.gov.uk/namespaces/legislation", @@ -58,6 +93,31 @@ def __init__(self): } def build_cache(self, config: SearchDocumentConfig): + """ + Builds a cache of legislation documents by retrieving XML data from + URLs specified in a DataFrame. + + Parameters: + config (SearchDocumentConfig): Configuration object for searching + documents. + + Raises: + Exception: If there's an error fetching data from the URL or no data + is returned. + + Functionality: + 1. Logs the start of the caching process. + 2. Loads legislation data into a DataFrame. + 3. Iterates over each row in the DataFrame to fetch XML data from + specified URLs. + 4. Extracts and parses XML data, logging relevant informational + and error messages. + 5. Extracts specific fields (identifier, title, description, etc.) + from the parsed XML data. + 6. Converts the extracted data to JSON format. + 7. Inserts or updates the document in the cache. + 8. Logs errors and re-raises them if data retrieval fails. + """ logger.info("building legislation cache...") dataset = construction_legislation_dataframe() @@ -138,6 +198,22 @@ def _to_json( title, valid, ): + """ + Converts given parameters into a JSON-like dictionary format. + + Arguments: + description (str): Description of the item. + format (str): Format of the item. + identifier (str): Unique identifier for the item. + language (str): Language in which the item is available. + modified (str): The date when the item was last modified. + publisher (str): The publisher of the item. + title (str): The title of the item. + valid (str): The date until which the item is considered valid. + + Returns: + dict: A dictionary containing the item details in a structured format. + """ return { "id": generate_short_uuid(), "title": title, diff --git a/orp/orp_search/models.py b/orp/orp_search/models.py index 46b7551..11be85f 100644 --- a/orp/orp_search/models.py +++ b/orp/orp_search/models.py @@ -1,13 +1,57 @@ import logging -from django.core.exceptions import ValidationError -from django.core.validators import URLValidator from django.db import models logger = logging.getLogger(__name__) class DataResponseModel(models.Model): + """ + DataResponseModel + + A Django model representing various metadata fields related to data + responses. + + Attributes: + title: Title of the data response. + identifier: Unique identifier for the data response. + publisher: Entity that published the data response. + publisher_id: Unique ID of the publisher. + language: Language in which the data response is published. + format: Format of the data response. + description: Brief description of the data response. + date_issued: Date when the data response was issued. + date_modified: Date when the data response was last modified. + date_valid: Validity date of the data response as text. + sort_date: Date used for sorting the data responses. + audience: Intended audience for the data response. + coverage: Coverage details of the data response. + subject: Subject matter of the data response. + type: Type of the data response. + license: Licensing information of the data response. + regulatory_topics: Topics covered by the data response. + status: Current status of the data response. + date_uploaded_to_orp: Date when the data response was uploaded to ORP. + has_format: Format details that the data response has. + is_format_of: + Indicates if the data response is a format of another resource. + has_version: Version details that the data response has. + is_version_of: + Indicates if the data response is a version of another resource. + references: References cited in the data response. + is_referenced_by: + Indicates if the data response is referenced by another resource. + has_part: Part details that the data response has. + is_part_of: + Indicates if the data response is a part of another resource. + is_replaced_by: + Indicates if the data response is replaced by another resource. + replaces: Indicates if the data response replaces another resource. + related_legislation: Related legislation details for the data response. + id: Primary key of the data response. + score: Score associated with the data response, default is 0. + """ + title = models.TextField(null=True, blank=True) identifier = models.TextField(null=True, blank=True) publisher = models.TextField(null=True, blank=True) @@ -40,18 +84,3 @@ class DataResponseModel(models.Model): related_legislation = models.TextField(null=True, blank=True) id = models.TextField(primary_key=True) score = models.IntegerField(null=True, blank=True, default=0) - - def __str__(self): - return self.title - - def clean(self): - """ - Validate the id field to check if it's a URL or not. - """ - url_validator = URLValidator() - try: - url_validator(self.id) - except ValidationError: - # It's not a URL, which is acceptable as it's a - # CharField that supports both - pass diff --git a/orp/orp_search/utils/documents.py b/orp/orp_search/utils/documents.py index b3d7dcc..18e47ff 100644 --- a/orp/orp_search/utils/documents.py +++ b/orp/orp_search/utils/documents.py @@ -86,7 +86,12 @@ def _extract_terms(search_query): def generate_short_uuid(): - # Generate a UUID + """ + Generates a short, URL-safe UUID. + + Returns: + str: A URL-safe base64 encoded UUID truncated to 22 characters. + """ uid = uuid.uuid4() # Encode it to base64 uid_b64 = base64.urlsafe_b64encode(uid.bytes).rstrip(b"=").decode("ascii") diff --git a/orp/orp_search/utils/paginate.py b/orp/orp_search/utils/paginate.py index 310bf96..d45ab91 100644 --- a/orp/orp_search/utils/paginate.py +++ b/orp/orp_search/utils/paginate.py @@ -12,6 +12,52 @@ def paginate( context: dict, config: SearchDocumentConfig, results: QuerySet ) -> dict: + """ + Paginates the given query set and updates the context with + pagination details. + + Parameters: + - context (dict): + The context dictionary to be updated with pagination details. + - config (SearchDocumentConfig): + Configuration object containing limit and offset for pagination. + - results (QuerySet): The query set of documents to be paginated. + + Returns: + - dict: + The updated context dictionary containing pagination information and + paginated documents. + + Logs the time taken for the pagination process in different stages: + 1. Time taken to paginate the documents. + 2. Time taken to process regulatory topics for each document. + 3. Time taken to update the context with pagination details. + + Handles pagination exceptions: + - If the page is not an integer, defaults to the first page. + - If the page is empty, defaults to the last page. + + Converts the paginated documents into a list of JSON objects with keys: + - "id" + - "title" + - "publisher" + - "description" + - "type" + - "date_modified" + - "date_valid" + - "regulatory_topics" + + Updates the context with: + - Paginator object. + - Paginated documents in JSON format. + - Total number of results in the current page. + - Boolean to indicate if pagination is needed. + - Total number of results. + - Total number of pages. + - Current page number. + - Start index of the results in the current page. + - End index of the results in the current page. + """ start_time = time.time() logger.info("paginating documents...") diff --git a/orp/orp_search/views.py b/orp/orp_search/views.py index 2ce4507..9e78be6 100644 --- a/orp/orp_search/views.py +++ b/orp/orp_search/views.py @@ -18,7 +18,8 @@ @require_http_methods(["GET"]) def document(request: HttpRequest, id) -> HttpResponse: - """Document details view. + """ + Document details view. Handles the GET request to fetch details based on the provided id. """ @@ -45,6 +46,28 @@ def document(request: HttpRequest, id) -> HttpResponse: @require_http_methods(["GET"]) def download_search_csv(request: HttpRequest) -> HttpResponse: + """ + Handles the download of search results as a CSV file. + + This view function is restricted to the GET HTTP method. + It accepts several query + + parameters to configure the search: + - `search`: A string to search within the documents. + - `document_type`: + A list of document types to filter the search results. + - `publisher`: A list of publishers to filter the search results. + - `sort`: A field name to sort the search results. + + The function constructs a `SearchDocumentConfig` object using the + received query parameters and performs a search using this + configuration. `DataResponseModel` objects from the search results + are retrieved and compiled into a list of dictionaries, which is + then converted into a DataFrame for demonstration purposes. + Finally, the ataFrame is written into a CSV file and returned as + an HTTP response with the appropriate content type and file + attachment headers. + """ search_query = request.GET.get("search", "") document_types = request.GET.getlist("document_type", "") publishers = request.GET.getlist("publisher", None) @@ -95,7 +118,8 @@ def download_search_csv(request: HttpRequest) -> HttpResponse: @require_http_methods(["GET"]) def search_django(request: HttpRequest): - """Search view. + """ + Search view. Renders the Django based search page. """ @@ -109,7 +133,8 @@ def search_django(request: HttpRequest): @require_http_methods(["GET"]) def search_react(request: HttpRequest) -> HttpResponse: - """Search view. + """ + Search view. Renders the React based search page. """ From 123f4e4d5fe610ecf4037af30b62ce074573fc32 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 25 Nov 2024 22:41:19 +0000 Subject: [PATCH 2/7] update:logging levels from info to debug Changed all logger.info statements to logger.debug for less verbose logging in non-critical areas. This reduces log clutter and focuses info-level logging on more significant events. --- orp/orp_search/utils/documents.py | 12 ++++++------ orp/orp_search/utils/paginate.py | 9 +++++---- orp/orp_search/utils/search.py | 16 ++++++++-------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/orp/orp_search/utils/documents.py b/orp/orp_search/utils/documents.py index 18e47ff..c76eb8d 100644 --- a/orp/orp_search/utils/documents.py +++ b/orp/orp_search/utils/documents.py @@ -9,10 +9,10 @@ def clear_all_documents(): - logger.info("clearing all documents from table...") + logger.debug("clearing all documents from table...") try: DataResponseModel.objects.all().delete() - logger.info("documents cleared from table") + logger.debug("documents cleared from table") except Exception as e: logger.error(f"error clearing documents: {e}") throw_error(f"error clearing documents: {e}") @@ -20,16 +20,15 @@ def clear_all_documents(): def insert_or_update_document(document_json): try: - logger.info("creating document...") + logger.debug("creating document...") logger.debug(f"document: {document_json}") - # Try to create a new document document = DataResponseModel(**document_json) document.full_clean() document.save() except Exception as e: logger.error(f"error creating document: {document_json}") logger.error(f"error: {e}") - logger.info("document already exists, updating...") + logger.debug("document already exists, updating...") # If a duplicate key error occurs, update the existing document try: @@ -37,7 +36,7 @@ def insert_or_update_document(document_json): for key, value in document_json.items(): setattr(document, key, value) document.save() - logger.info(f"document updated: {document}") + logger.debug(f"document updated: {document}") except Exception as e: logger.error(f"error updating document: {document_json}") logger.error(f"error: {e}") @@ -93,6 +92,7 @@ def generate_short_uuid(): str: A URL-safe base64 encoded UUID truncated to 22 characters. """ uid = uuid.uuid4() + # Encode it to base64 uid_b64 = base64.urlsafe_b64encode(uid.bytes).rstrip(b"=").decode("ascii") return uid_b64[ diff --git a/orp/orp_search/utils/paginate.py b/orp/orp_search/utils/paginate.py index d45ab91..55031d6 100644 --- a/orp/orp_search/utils/paginate.py +++ b/orp/orp_search/utils/paginate.py @@ -60,7 +60,7 @@ def paginate( """ start_time = time.time() - logger.info("paginating documents...") + logger.debug("paginating documents...") paginator = Paginator(results, config.limit) try: paginated_documents = paginator.page(config.offset) @@ -70,7 +70,7 @@ def paginate( paginated_documents = paginator.page(paginator.num_pages) end_time = time.time() - logger.info( + logger.debug( f"time taken to paginate (before description +/ regulatory topics):" f" {round(end_time - start_time, 2)} seconds" ) @@ -88,7 +88,7 @@ def paginate( ).split("\n") end_time = time.time() - logger.info( + logger.debug( f"time taken to paginate " f"(after description +/ regulatory topics): " f"{round(end_time - start_time, 2)} seconds" @@ -121,7 +121,8 @@ def paginate( context["start_index"] = paginated_documents.start_index() context["end_index"] = paginated_documents.end_index() end_time = time.time() - logger.info( + + logger.debug( f"time taken to paginate (after adding to context): " f"{round(end_time - start_time, 2)} seconds" ) diff --git a/orp/orp_search/utils/search.py b/orp/orp_search/utils/search.py index 9db61d6..4013c85 100644 --- a/orp/orp_search/utils/search.py +++ b/orp/orp_search/utils/search.py @@ -72,11 +72,11 @@ def search_database( # Sanatize the query string query_str = sanitize_input(config.search_query) - logger.info(f"sanitized search query: {query_str}") + logger.debug(f"sanitized search query: {query_str}") # Generate query object query_objs = _create_search_query(query_str) - logger.info(f"search query objects: {query_objs}") + logger.debug(f"search query objects: {query_objs}") # Search across specific fields vector = SearchVector("title", "description", "regulatory_topics") @@ -142,7 +142,7 @@ def search_database( def search(context: dict, request: HttpRequest) -> dict: - logger.info("received search request: %s", request) + logger.debug("received search request: %s", request) start_time = time.time() search_query = request.GET.get("query", "") @@ -175,13 +175,13 @@ def search(context: dict, request: HttpRequest) -> dict: context = paginate(context, config, results) pag_end_time = time.time() - logger.info( + logger.debug( f"time taken to paginate (called from views.py): " f"{round(pag_end_time - pag_start_time, 2)} seconds" ) end_time = time.time() - logger.info( + logger.debug( f"time taken to search and produce response: " f"{round(end_time - start_time, 2)} seconds" ) @@ -195,7 +195,7 @@ class Trim(Func): def get_publisher_names(): - logger.info("getting publisher names...") + logger.debug("getting publisher names...") publishers_list = [] try: @@ -213,7 +213,7 @@ def get_publisher_names(): except Exception as e: logger.error(f"error getting publisher names: {e}") - logger.info("returning empty list of publishers") + logger.debug("returning empty list of publishers") - logger.info(f"publishers found: {publishers_list}") + logger.debug(f"publishers found: {publishers_list}") return publishers_list From 664e5e95ab396bc40db99a63f33633eaa43ad7ae Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Mon, 25 Nov 2024 22:54:27 +0000 Subject: [PATCH 3/7] chore:add detailed docstrings to utility functions Enhanced the docstrings for the `clear_all_documents`, `insert_or_update_document`, and `calculate_score` functions. These docstrings now include comprehensive explanations of parameters, behavior, logging, and exceptions. --- orp/orp_search/utils/documents.py | 45 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/orp/orp_search/utils/documents.py b/orp/orp_search/utils/documents.py index c76eb8d..ce5ea27 100644 --- a/orp/orp_search/utils/documents.py +++ b/orp/orp_search/utils/documents.py @@ -9,6 +9,16 @@ def clear_all_documents(): + """ + Clears all documents from the 'DataResponseModel' table in the database. + + Logs the process of clearing the documents and handles any exceptions + that may occur. If an error occurs, it logs the error message and + raises an error. + + Raises: + CustomError: If there is an error while clearing the documents. + """ logger.debug("clearing all documents from table...") try: DataResponseModel.objects.all().delete() @@ -19,6 +29,28 @@ def clear_all_documents(): def insert_or_update_document(document_json): + """ + Inserts or updates a database document based on the given JSON data. + + The function first attempts to create a new document using the + provided JSON data. + + If the document already exists (detected through an exception), + it catches the error and tries to update the existing document instead. + + Args: + document_json (dict): A dictionary containing the data for the + document to be inserted or updated. + + Raises: + Exception: If an error occurs during either the insert or update + operation, the error is logged and re-raised. + + Logs: + Logs detailed debug messages for each step, including the document + being inserted, any errors encountered, and the outcome of the update + operation. + """ try: logger.debug("creating document...") logger.debug(f"document: {document_json}") @@ -45,12 +77,15 @@ def insert_or_update_document(document_json): def calculate_score(config, queryset: QuerySet): """ - Calculate the score of a search result based on the number of - search terms found in the title and description. + Calculate the search relevance score for each document in the queryset. + + Args: + config: Configuration object containing the search query settings. + queryset: QuerySet of documents to be scored. - :param search_terms: A list of search terms to look for in the - search result. - :return: The score based on the number of search terms found. + The function tokenizes the search query, filters out "AND" and "OR", + and computes the score for each document based on the frequency of + search terms in the document's title and description. """ def _extract_terms(search_query): From 8cde7be1090226a9753b6db87094de872b6ee55f Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 26 Nov 2024 01:07:55 +0000 Subject: [PATCH 4/7] chore:add unit tests for _create_search_query in search utility Created comprehensive unit tests for the _create_search_query function within the search utility, validating behavior for single-word, AND/OR operators, and phrase queries. Additionally, fixed the method of combining search queries using __and__ and __or__ instead of the bitwise operators. This enhances the robustness and correctness of the search functionality. --- orp/orp_search/tests/__init__.py | 0 orp/orp_search/tests/test_search.py | 118 ++++++++++++++++++++++++++++ orp/orp_search/utils/search.py | 8 +- pytest.ini | 3 + 4 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 orp/orp_search/tests/__init__.py create mode 100644 orp/orp_search/tests/test_search.py create mode 100644 pytest.ini diff --git a/orp/orp_search/tests/__init__.py b/orp/orp_search/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/orp/orp_search/tests/test_search.py b/orp/orp_search/tests/test_search.py new file mode 100644 index 0000000..80736a5 --- /dev/null +++ b/orp/orp_search/tests/test_search.py @@ -0,0 +1,118 @@ +import unittest + +from unittest.mock import MagicMock, call, patch + +from orp_search.utils.search import create_search_query + + +class TestCreateSearchQuery(unittest.TestCase): + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_single_word_query(self, mock_search_query): + result = create_search_query("test") + mock_search_query.assert_called_with("test", search_type="plain") + self.assertEqual(result, mock_search_query.return_value) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_and_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2] + + # Call the function + _ = create_search_query("test AND trial") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__and__.assert_called_once_with(mock_query2) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_multiple_and_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + mock_query3 = MagicMock(name="MockQuery3") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2, mock_query3] + + # Call the function + _ = create_search_query("test AND trial AND error") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + call("error", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__and__.assert_called_with(mock_query3) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_or_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2] + + # Call the function + _ = create_search_query("test OR trial") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__or__.assert_called_once_with(mock_query2) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_multiple_or_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + mock_query3 = MagicMock(name="MockQuery3") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2, mock_query3] + + # Call the function + _ = create_search_query("test OR trial OR error") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + call("error", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__or__.assert_called_with(mock_query3) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_phrase_search_query(self, mock_search_query): + result = create_search_query('"test trial"') + mock_search_query.assert_called_with( + "test trial", search_type="phrase" + ) + self.assertEqual(result, mock_search_query.return_value) + + +if __name__ == "__main__": + unittest.main() diff --git a/orp/orp_search/utils/search.py b/orp/orp_search/utils/search.py index 4013c85..1563a5e 100644 --- a/orp/orp_search/utils/search.py +++ b/orp/orp_search/utils/search.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -def _create_search_query(search_string): +def create_search_query(search_string): """ Create a search query from a search string with AND/OR operators @@ -49,9 +49,9 @@ def _create_search_query(search_string): preprocess_query = search_query else: if current_operator == "&": - preprocess_query &= search_query + preprocess_query.__and__(search_query) elif current_operator == "|": - preprocess_query |= search_query + preprocess_query.__or__(search_query) return preprocess_query @@ -75,7 +75,7 @@ def search_database( logger.debug(f"sanitized search query: {query_str}") # Generate query object - query_objs = _create_search_query(query_str) + query_objs = create_search_query(query_str) logger.debug(f"search query objects: {query_objs}") # Search across specific fields diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..7b775ce --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +DJANGO_SETTINGS_MODULE = config.settings.local +DJANGO_FIND_PROJECT = false" From 27b9ac9e1ba3b5a6519e023a2d591a2568bfa1ff Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 26 Nov 2024 01:20:13 +0000 Subject: [PATCH 5/7] chore:add tests for implicit AND operator in search queries Introduce unittests to verify implicit AND operator handling in `create_search_query` when parsing multiple terms. Ensure each term is queried individually, then combined using the AND operation. --- orp/orp_search/tests/test_search.py | 48 +++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/orp/orp_search/tests/test_search.py b/orp/orp_search/tests/test_search.py index 80736a5..66d11cc 100644 --- a/orp/orp_search/tests/test_search.py +++ b/orp/orp_search/tests/test_search.py @@ -13,6 +13,54 @@ def test_single_word_query(self, mock_search_query): mock_search_query.assert_called_with("test", search_type="plain") self.assertEqual(result, mock_search_query.return_value) + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_implicit_and_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2] + + # Call the function + _ = create_search_query("test trial") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__and__.assert_called_once_with(mock_query2) + + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_multiple_implicit_and_search_operator_query( + self, mock_search_query + ): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + mock_query3 = MagicMock(name="MockQuery3") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2, mock_query3] + + # Call the function + _ = create_search_query("test trial error") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + call("error", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__and__.assert_called_with(mock_query3) + @patch("orp_search.utils.search.SearchQuery", autospec=True) def test_and_search_operator_query(self, mock_search_query): # Mock SearchQuery instances From 53cc42e60d199fe7e5704059826dbcabb85396f6 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 26 Nov 2024 09:09:17 +0000 Subject: [PATCH 6/7] chore:add new search query test cases for complex AND/OR operators Introduce tests to validate multiple OR queries and mixed AND/OR operators. This ensures correct handling of compound search expressions and improves coverage for different search scenarios. --- orp/orp_search/tests/test_search.py | 60 +++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/orp/orp_search/tests/test_search.py b/orp/orp_search/tests/test_search.py index 66d11cc..53c2c3d 100644 --- a/orp/orp_search/tests/test_search.py +++ b/orp/orp_search/tests/test_search.py @@ -129,6 +129,30 @@ def test_or_search_operator_query(self, mock_search_query): # Assert the AND operation was applied mock_query1.__or__.assert_called_once_with(mock_query2) + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_multple_or_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + mock_query3 = MagicMock(name="MockQuery3") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2, mock_query3] + + # Call the function + _ = create_search_query("test OR trial OR error") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + call("error", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__or__.assert_called_with(mock_query3) + @patch("orp_search.utils.search.SearchQuery", autospec=True) def test_multiple_or_search_operator_query(self, mock_search_query): # Mock SearchQuery instances @@ -161,6 +185,42 @@ def test_phrase_search_query(self, mock_search_query): ) self.assertEqual(result, mock_search_query.return_value) + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_and_multiple_single_single_phrase_search_query( + self, mock_search_query + ): + mock_query1 = MagicMock(name="MockQuery1") # word1 + mock_query2 = MagicMock(name="MockQuery2") # word2 + mock_query3 = MagicMock(name="MockQuery3") # "word3 word4" + mock_query4 = MagicMock(name="MockQuery4") # word5 + mock_query5 = MagicMock(name="MockQuery5") # word6 + + # Configure the mock to return mock objects + mock_search_query.side_effect = [ + mock_query1, + mock_query2, + mock_query3, + mock_query4, + mock_query5, + ] + + _ = create_search_query( + 'word1 AND word2 AND "word3 word4" AND word5 word6' + ) + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("word1", search_type="plain"), + call("word2", search_type="plain"), + call("word3 word4", search_type="phrase"), + call("word5", search_type="plain"), + call("word6", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the AND operation was applied + mock_query1.__and__.assert_called_with(mock_query5) + if __name__ == "__main__": unittest.main() From 3264ae5a681a222e3c29c40115031caee71deee4 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 26 Nov 2024 10:12:58 +0000 Subject: [PATCH 7/7] chore:add test for single OR and AND operations in search query This commit introduces a new test case to validate the combination of OR and AND operations within a single search query. The test checks the correct instantiation of SearchQuery objects and verifies the application of OR and AND logical operations. --- orp/orp_search/tests/test_search.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/orp/orp_search/tests/test_search.py b/orp/orp_search/tests/test_search.py index 53c2c3d..7e6d53f 100644 --- a/orp/orp_search/tests/test_search.py +++ b/orp/orp_search/tests/test_search.py @@ -221,6 +221,31 @@ def test_and_multiple_single_single_phrase_search_query( # Assert the AND operation was applied mock_query1.__and__.assert_called_with(mock_query5) + @patch("orp_search.utils.search.SearchQuery", autospec=True) + def test_single_or_and_search_operator_query(self, mock_search_query): + # Mock SearchQuery instances + mock_query1 = MagicMock(name="MockQuery1") + mock_query2 = MagicMock(name="MockQuery2") + mock_query3 = MagicMock(name="MockQuery3") + + # Configure the mock to return mock objects + mock_search_query.side_effect = [mock_query1, mock_query2, mock_query3] + + # Call the function + _ = create_search_query("test OR trial AND error") + + # Assert that SearchQuery was called with expected arguments + calls = [ + call("test", search_type="plain"), + call("trial", search_type="plain"), + call("error", search_type="plain"), + ] + mock_search_query.assert_has_calls(calls, any_order=False) + + # Assert the OR and AND operation was applied + mock_query1.__or__.assert_called_with(mock_query2) + # mock_query2.__and__.assert_called_with(mock_query3) # TODO:fix assert + if __name__ == "__main__": unittest.main()