Merge branch 'main' of github.com:uktrade/orp into feature/ORPD-85-de…

…sign-tweaks
uktrade · Nov 19, 2024 · 5f1491e · 5f1491e
2 parents c8fbcc1 + 591b097
commit 5f1491e
Show file tree

Hide file tree

Showing 29 changed files with 3,675 additions and 8,870 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+per-file-ignores =
+    construction_legislation.py: E501
diff --git a/front_end/stylesheets/helpers.scss b/front_end/stylesheets/helpers.scss
@@ -82,3 +82,8 @@
     text-decoration: underline;
   }
 }
+
+.orp-marked-text {
+  font-weight: bold;
+  background-color: inherit;
+}
diff --git a/orp/config/settings/base.py b/orp/config/settings/base.py
@@ -106,7 +106,6 @@
             **dj_database_url.parse(
                 DATABASE_URL,
                 engine="postgresql",
-                conn_max_age=0,
             ),
             "ENGINE": "django.db.backends.postgresql",
         }

diff --git a/orp/config/settings/local.py b/orp/config/settings/local.py
@@ -5,6 +5,15 @@
 # Applications that are required to load before DJANGO_APPS
 BASE_APPS = [
     "whitenoise.runserver_nostatic",  # Serve static files via WhiteNoise
+    "rest_framework",
 ]
 
+# REST_FRAMEWORK = {
+#     # Use Django's standard `django.contrib.auth` permissions,
+#     # or allow read-only access for unauthenticated users.
+#     'DEFAULT_PERMISSION_CLASSES': [
+#         'rest_framework.permissions.DjangoModelPermissionsOrAnonReadOnly',
+#     ]
+# }
+
 INSTALLED_APPS = BASE_APPS + INSTALLED_APPS  # noqa
diff --git a/orp/config/urls.py b/orp/config/urls.py
@@ -1,16 +1,156 @@
 """orp URL configuration."""
 
+import logging
+import time
+
 import orp_search.views as orp_search_views
 
+from orp_search.config import SearchDocumentConfig
+from orp_search.models import DataResponseModel, logger
+from orp_search.utils.documents import clear_all_documents
+from orp_search.utils.search import get_publisher_names, search
+from rest_framework import routers, serializers, status, viewsets
+from rest_framework.decorators import action
+from rest_framework.response import Response
+
 from django.conf import settings
 from django.contrib import admin
-from django.urls import path
+from django.urls import include, path
 
 import core.views as core_views
 
+urls_logger = logging.getLogger(__name__)
+
+
+# Serializers define the API representation.
+class DataResponseSerializer(serializers.HyperlinkedModelSerializer):
+    class Meta:
+        model = DataResponseModel
+        fields = [
+            "id",
+            "title",
+            "link",
+            "publisher",
+            "language",
+            "format",
+            "description",
+            "date_issued",
+            "date_modified",
+            "date_valid",
+            "audience",
+            "coverage",
+            "subject",
+            "type",
+            "license",
+            "regulatory_topics",
+            "status",
+            "date_uploaded_to_orp",
+            "has_format",
+            "is_format_of",
+            "has_version",
+            "is_version_of",
+            "references",
+            "is_referenced_by",
+            "has_part",
+            "is_part_of",
+            "is_replaced_by",
+            "replaces",
+            "related_legislation",
+            "id",
+        ]
+
+
+class DataResponseViewSet(viewsets.ModelViewSet):
+    @action(detail=False, methods=["get"], url_path="search")
+    def search(self, request, *args, **kwargs):
+        context = {
+            "service_name": settings.SERVICE_NAME_SEARCH,
+        }
+
+        try:
+            response_data = search(context, request)
+
+            # Create a json object from context but exclude paginator
+            response_data = {
+                "results": response_data["results"],
+                "results_count": response_data["results_count"],
+                "is_paginated": response_data["is_paginated"],
+                "results_total_count": response_data["results_total_count"],
+                "results_page_total": response_data["results_page_total"],
+                "current_page": response_data["current_page"],
+                "start_index": response_data["start_index"],
+                "end_index": response_data["end_index"],
+            }
+
+            # Return the response
+            return Response(response_data, status=status.HTTP_200_OK)
+        except Exception as e:
+            return Response(
+                data={"message": f"error searching: {e}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+
+class RebuildCacheViewSet(viewsets.ViewSet):
+    @action(detail=False, methods=["post"], url_path="rebuild")
+    def rebuild_cache(self, request, *args, **kwargs):
+        from orp_search.legislation import Legislation
+        from orp_search.public_gateway import PublicGateway
+
+        tx_begin = time.time()
+        try:
+            clear_all_documents()
+            config = SearchDocumentConfig(search_query="", timeout=10)
+            Legislation().build_cache(config)
+            PublicGateway().build_cache(config)
+        except Exception as e:
+            return Response(
+                data={"message": f"[urls] error clearing documents: {e}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+        tx_end = time.time()
+        urls_logger.info(
+            f"time taken to rebuild cache: "
+            f"{round(tx_end - tx_begin, 2)} seconds"
+        )
+        return Response(
+            data={
+                "message": "rebuilt cache",
+                "duration": round(tx_end - tx_begin, 2),
+            },
+            status=status.HTTP_200_OK,
+        )
+
+
+class PublishersViewSet(viewsets.ViewSet):
+    @action(detail=False, methods=["get"], url_path="publishers")
+    def publishers(self, request, *args, **kwargs):
+        try:
+            publishers = get_publisher_names()
+            logger.info(f"publishers: {publishers}")
+
+            return Response(
+                data={"results": publishers},
+                status=status.HTTP_200_OK,
+            )
+        except Exception as e:
+            return Response(
+                data={"message": f"error fetching publishers: {e}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+
+# Routers provide an easy way of automatically determining the URL conf.
+router = routers.DefaultRouter()
+router.register(r"v1", DataResponseViewSet, basename="search")
+router.register(r"v1/cache", RebuildCacheViewSet, basename="rebuild")
+router.register(r"v1/retrieve", PublishersViewSet, basename="publishers")
+
 urlpatterns = [
+    path("api/", include(router.urls)),
     path("", orp_search_views.search_react, name="search_react"),
-    path("nojs/", orp_search_views.search, name="search"),
+    path("nojs/", orp_search_views.search_django, name="search_django"),
     # If we choose to have a start page with green button, this is it:
     # path("", core_views.home, name="home"),
     path(

diff --git a/orp/orp_search/config.py b/orp/orp_search/config.py
@@ -1,20 +1,17 @@
 import logging
 
-from orp_search.utils.terms import combine_search_terms, parse_search_terms
-
 logger = logging.getLogger(__name__)
 
 
 class SearchDocumentConfig:
     def __init__(
         self,
-        search_terms: str,
+        search_query: str,
         document_types=None,
         timeout=None,
-        dummy=False,
         limit=10,
         offset=1,
-        publisher_terms=None,
+        publisher_names=None,
         sort_by=None,
         id=None,
     ):
@@ -27,24 +24,15 @@ def __init__(
         :param timeout: Optional. The timeout in seconds for the search
                         request.
         """
-        self.search_terms = [term.strip() for term in search_terms.split(",")]
+        self.search_query = search_query
         self.document_types = document_types
         self.timeout = None if timeout is None else int(timeout)
-        self.dummy = dummy
         self.limit = limit
         self.offset = offset
-        self.publisher_terms = publisher_terms
+        self.publisher_names = publisher_names
         self.sort_by = sort_by
         self.id = id
 
-        # Parse search terms
-        search_terms_and, search_terms_or = parse_search_terms(search_terms)
-        self.search_terms_and = search_terms_and
-        self.search_terms_or = search_terms_or
-        self.final_search_expression = combine_search_terms(
-            search_terms_and, search_terms_or
-        )
-
     def validate(self):
         """
 
@@ -73,22 +61,12 @@ def validate(self):
                 return False
         return True
 
-    def build_search_term(self):
-        # Rules config.search_terms
-        # 1. If search terms is empty, return empty string
-        # 2. If search terms begin with a quote and end with a quote
-        # then treat as a phrase
-        # 3. If search terms contain a + between two terms then treat
-        # as an AND search
-        # 4. If search terms contain a space between two terms then treat
-        # as a OR search
-
-        search_term_tmp = []
-
-        for term in self.search_terms:
-            if term.startswith('"') and term.endswith('"'):
-                search_term_tmp.append(f'"{term}"')
-            elif "+" in term:
-                search_term_tmp.append(term.replace("+", " AND "))
-            else:
-                search_term_tmp.append(term)
+    def print_to_log(self):
+        logger.info(f"search_query: {self.search_query}")
+        logger.info(f"document_types: {self.document_types}")
+        logger.info(f"timeout: {self.timeout}")
+        logger.info(f"limit: {self.limit}")
+        logger.info(f"offset: {self.offset}")
+        logger.info(f"publisher_names: {self.publisher_names}")
+        logger.info(f"sort_by: {self.sort_by}")
+        logger.info(f"id: {self.id}")