refactor:code for improved date handling and logging

Replaced custom date normalization with utility function `convert_date_string_to_obj` across multiple files. Enhanced logging for error handling and added response structuring for search API. Removed redundant migration and adjusted model field types for consistency.
uktrade · Nov 14, 2024 · b822177 · b822177
1 parent a4f587e
commit b822177
Show file tree

Hide file tree

Showing 9 changed files with 204 additions and 269 deletions.
diff --git a/orp/config/urls.py b/orp/config/urls.py
@@ -1,5 +1,8 @@
 """orp URL configuration."""
 
+import logging
+import time
+
 import orp_search.views as orp_search_views
 
 from orp_search.config import SearchDocumentConfig
@@ -16,6 +19,8 @@
 
 import core.views as core_views
 
+urls_logger = logging.getLogger(__name__)
+
 
 # Serializers define the API representation.
 class DataResponseSerializer(serializers.HyperlinkedModelSerializer):
@@ -56,26 +61,43 @@ class Meta:
 
 
 class DataResponseViewSet(viewsets.ModelViewSet):
-    serializer_class = DataResponseSerializer
-
-    def list(self, request, *args, **kwargs):
-        # Assuming `search` is a function that
-        # processes the request and returns data
+    @action(detail=False, methods=["get"], url_path="search")
+    def search(self, request, *args, **kwargs):
         context = {
             "service_name": settings.SERVICE_NAME_SEARCH,
         }
-        response_data = search(context, request)
 
-        # Return the response
-        return Response(response_data, status=status.HTTP_200_OK)
+        try:
+            response_data = search(context, request)
+
+            # Create a json object from context but exclude paginator
+            response_data = {
+                "results": response_data["results"],
+                "results_count": response_data["results_count"],
+                "is_paginated": response_data["is_paginated"],
+                "results_total_count": response_data["results_total_count"],
+                "results_page_total": response_data["results_page_total"],
+                "current_page": response_data["current_page"],
+                "start_index": response_data["start_index"],
+                "end_index": response_data["end_index"],
+            }
+
+            # Return the response
+            return Response(response_data, status=status.HTTP_200_OK)
+        except Exception as e:
+            return Response(
+                data={"message": f"error searching: {e}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
 
 
 class RebuildCacheViewSet(viewsets.ViewSet):
-    @action(detail=False, methods=["post"], url_path="cache")
+    @action(detail=False, methods=["post"], url_path="rebuildcache")
     def rebuild_cache(self, request, *args, **kwargs):
         from orp_search.legislation import Legislation
         from orp_search.public_gateway import PublicGateway
 
+        tx_begin = time.time()
         try:
             clear_all_documents()
             config = SearchDocumentConfig(search_query="", timeout=10)
@@ -87,15 +109,24 @@ def rebuild_cache(self, request, *args, **kwargs):
                 status=status.HTTP_500_INTERNAL_SERVER_ERROR,
             )
 
+        tx_end = time.time()
+        urls_logger.info(
+            f"time taken to rebuild cache: "
+            f"{round(tx_end - tx_begin, 2)} seconds"
+        )
         return Response(
-            data={"message": "rebuilt cache"}, status=status.HTTP_200_OK
+            data={
+                "message": "rebuilt cache",
+                "duration": round(tx_end - tx_begin, 2),
+            },
+            status=status.HTTP_200_OK,
         )
 
 
 # Routers provide an easy way of automatically determining the URL conf.
 router = routers.DefaultRouter()
-router.register(r"dataresults", DataResponseViewSet, basename="dataresponse")
-router.register(r"rebuild", RebuildCacheViewSet, basename="cache")
+router.register(r"v1", DataResponseViewSet, basename="search")
+router.register(r"v1", RebuildCacheViewSet, basename="rebuildcache")
 
 urlpatterns = [
     path("", include(router.urls)),

diff --git a/orp/orp_search/legislation.py b/orp/orp_search/legislation.py
@@ -1,18 +1,16 @@
 import base64
 import logging
-import re
 import xml.etree.ElementTree as ET  # nosec BXXX
 
-from datetime import datetime
 from typing import Optional
 
 import requests  # type: ignore
 
-from numpy.f2py.auxfuncs import throw_error
 from orp_search.config import SearchDocumentConfig
 from orp_search.construction_legislation import (  # noqa: E501
     construction_legislation_dataframe,
 )
+from orp_search.utils.date import convert_date_string_to_obj
 from orp_search.utils.documents import insert_or_update_document
 
 logger = logging.getLogger(__name__)
@@ -44,45 +42,6 @@ def _get_text_from_element(element: Optional[ET.Element]) -> Optional[str]:
     return element.text if element is not None else None
 
 
-def _convert_to_date_string(
-    date_str: Optional[str], date_format: str = "%Y-%m-%d"
-) -> Optional[str]:
-    """
-    Converts a date string to a specified date format.
-    Supports both "%Y-%m-%d" and "%Y-%m" formats.
-    Removes double quotes if present in the string.
-
-    :param date_str: The date string to convert
-                    (e.g., '2021-03-01', '2014-11').
-    :return: The formatted date string or None if input is invalid.
-    """
-    if date_str and isinstance(date_str, str):
-        if date_str is None:
-            return None
-
-        # Remove double quotes and any non-date characters from the string
-        date_str = re.sub(r"[^\d-]", "", date_str)
-
-        # Determine the expected format and adjust the string accordingly
-        parts = date_str.split("-")
-        if len(parts) == 1:  # Handle "YYYY" case
-            date_str += "-01-01"
-            date_format = "%Y-%m-%d"
-        elif len(parts) == 2:  # Handle "YYYY-MM" case
-            date_str += "-01"
-            date_format = "%Y-%m-%d"
-        elif len(parts) == 3:  # Handle "YYYY-MM-DD" case
-            date_format = "%Y-%m-%d"
-
-        try:
-            date_obj = datetime.strptime(date_str, date_format)
-            return date_obj.strftime(date_format)
-        except ValueError as e:
-            logger.error(f"error converting date string: {e}")
-            return None
-    return None
-
-
 class Legislation:
     def __init__(self):
         # Define the XML namespaces
@@ -162,8 +121,7 @@ def build_cache(self, config: SearchDocumentConfig):
                     insert_or_update_document(document_json)
             except Exception as e:
                 logger.error(f"error fetching data from {url}: {e}")
-                throw_error(f"error fetching data from {url}: {e}")
-                return
+                raise e
 
     def _to_json(
         self,
@@ -184,9 +142,9 @@ def _to_json(
             "language": language if language is not None else "eng",
             "format": format if format is not None else "",
             "description": description if description is not None else "",
-            "date_issued": _convert_to_date_string(modified),
-            "date_modified": _convert_to_date_string(modified),
-            "date_valid": _convert_to_date_string(valid),
+            "date_issued": convert_date_string_to_obj(modified),
+            "date_modified": convert_date_string_to_obj(modified),
+            "date_valid": convert_date_string_to_obj(valid),
             "type": "Legislation",
             "score": 0,
         }
diff --git a/orp/orp_search/migrations/0001_initial.py b/orp/orp_search/migrations/0001_initial.py