Skip to content

Commit

Permalink
Merge branch 'main' of github.com:uktrade/find-business-regulations i…
Browse files Browse the repository at this point in the history
…nto ORPD-130-cookie-format
  • Loading branch information
gdbarnes committed Dec 2, 2024
2 parents 692b58e + 370021f commit e7430ba
Show file tree
Hide file tree
Showing 18 changed files with 348 additions and 54 deletions.
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,18 @@ isort: # Run isort

secrets-baseline: # Generate a new secrets baseline file
poetry run detect-secrets scan > .secrets.baseline

rebuild_cache:
export PYTHONPATH=./fbr && \
export DJANGO_SETTINGS_MODULE='fbr.config.settings.local' && \
export DATABASE_URL=postgres://postgres:postgres@localhost:5432/fbr && \
poetry install && \
poetry run rebuild-cache

setup_local: # Set up the local environment
@echo "$(COLOUR_GREEN)Running initial setup for local environment...$(COLOUR_NONE)"
$(MAKE) first-use
$(MAKE) start
$(MAKE) migrate
$(MAKE) rebuild_cache
@echo "$(COLOUR_GREEN)Local setup complete.$(COLOUR_NONE)"
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,12 @@ With your Poetry shell active:
> Code quality checks are also performed when pushing your code to origin
> but pre-commit hooks catch issues early and will improve Developer Experience.
### Update database tables
### Setup the service, database and cache locally

$ make setup_local


### Update database tables (manually)...do not use unless you know what you are doing

> To update local database tables, you need to set the `DATABASE_URL` environment variable. You can set it in the terminal or in the `.env` file.
Expand Down
Empty file added fbr/cache/__init__.py
Empty file.
File renamed without changes.
14 changes: 8 additions & 6 deletions fbr/search/legislation.py → fbr/cache/legislation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# isort: skip_file
# fmt: off
# flake8: noqa

import logging
import re
import xml.etree.ElementTree as ET # nosec BXXX
Expand All @@ -6,12 +10,10 @@

import requests # type: ignore

from search.config import SearchDocumentConfig
from search.construction_legislation import ( # noqa: E501
construction_legislation_dataframe,
)
from search.utils.date import convert_date_string_to_obj
from search.utils.documents import ( # noqa: E501
from fbr.cache.construction_legislation import construction_legislation_dataframe
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.date import convert_date_string_to_obj
from fbr.search.utils.documents import ( # noqa: E501
generate_short_uuid,
insert_or_update_document,
)
Expand Down
27 changes: 27 additions & 0 deletions fbr/cache/manage_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# flake8: noqa
import os
import time

import django

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")

django.setup()

from fbr.cache.legislation import Legislation
from fbr.cache.public_gateway import PublicGateway
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.documents import clear_all_documents


def rebuild_cache():
try:
start = time.time()
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
end = time.time()
return {"message": "rebuilt cache", "duration": round(end - start, 2)}
except Exception as e:
return {"message": f"error clearing documents: {e}"}
6 changes: 3 additions & 3 deletions fbr/search/public_gateway.py → fbr/cache/public_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import requests # type: ignore

from search.utils.date import convert_date_string_to_obj
from search.utils.documents import ( # noqa: E501
from fbr.search.utils.date import convert_date_string_to_obj
from fbr.search.utils.documents import ( # noqa: E501
generate_short_uuid,
insert_or_update_document,
)
Expand Down Expand Up @@ -86,7 +86,7 @@ def build_cache(self, config):
row["date_valid"] = convert_date_string_to_obj(
row.get("date_valid")
)
row["id"] = (generate_short_uuid(),)
row["id"] = generate_short_uuid()

row["publisher_id"] = (
None
Expand Down
22 changes: 22 additions & 0 deletions fbr/cache/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import time

from celery import shared_task

from fbr.cache.legislation import Legislation
from fbr.cache.public_gateway import PublicGateway
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.documents import clear_all_documents


@shared_task()
def rebuild_cache():
try:
start = time.time()
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
end = time.time()
return {"message": "rebuilt cache", "duration": round(end - start, 2)}
except Exception as e:
return {"message": f"error clearing documents: {e}"}
9 changes: 9 additions & 0 deletions fbr/config/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from celery import Celery

app = Celery("fbr_celery")

# Load settings from Django or directly
app.config_from_object("django.conf:settings", namespace="CELERY")

# Auto-discover tasks in installed apps
app.autodiscover_tasks()
7 changes: 7 additions & 0 deletions fbr/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@

THIRD_PARTY_APPS: list = [
"webpack_loader",
"django_celery_beat",
]

INSTALLED_APPS = DJANGO_APPS + LOCAL_APPS + THIRD_PARTY_APPS
Expand Down Expand Up @@ -262,3 +263,9 @@
GOOGLE_ANALYTICS_TAG_MANAGER_ID = env(
"GOOGLE_ANALYTICS_TAG_MANAGER_ID", default=None
)

# Celery
CELERY_BROKER_URL = "redis://<redis-host>:6379/0" # TODO: actual value
CELERY_ACCEPT_CONTENT = ["json"]
CELERY_TASK_SERIALIZER = "json"
CELERY_RESULT_BACKEND = "redis://<redis-host>:6379/0"
36 changes: 0 additions & 36 deletions fbr/config/urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Find business regulations URL configuration."""

import logging
import time

from rest_framework import routers, serializers, status, viewsets
from rest_framework.decorators import action
Expand All @@ -14,9 +13,7 @@
import core.views as core_views
import search.views as search_views

from search.config import SearchDocumentConfig
from search.models import DataResponseModel
from search.utils.documents import clear_all_documents
from search.utils.search import get_publisher_names, search

urls_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,38 +88,6 @@ def search(self, request, *args, **kwargs):
)


class RebuildCacheViewSet(viewsets.ViewSet):
@action(detail=False, methods=["post"], url_path="rebuild")
def rebuild_cache(self, request, *args, **kwargs):
from search.legislation import Legislation
from search.public_gateway import PublicGateway

tx_begin = time.time()
try:
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
except Exception as e:
return Response(
data={"message": f"[urls] error clearing documents: {e}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

tx_end = time.time()
urls_logger.info(
f"time taken to rebuild cache: "
f"{round(tx_end - tx_begin, 2)} seconds"
)
return Response(
data={
"message": "rebuilt cache",
"duration": round(tx_end - tx_begin, 2),
},
status=status.HTTP_200_OK,
)


class PublishersViewSet(viewsets.ViewSet):
@action(detail=False, methods=["get"], url_path="publishers")
def publishers(self, request, *args, **kwargs):
Expand Down Expand Up @@ -151,7 +116,6 @@ def publishers(self, request, *args, **kwargs):
# Routers provide an easy way of automatically determining the URL conf.
router = routers.DefaultRouter()
router.register(r"v1", DataResponseViewSet, basename="search")
router.register(r"v1/cache", RebuildCacheViewSet, basename="rebuild")
router.register(r"v1/retrieve", PublishersViewSet, basename="publishers")

urlpatterns = [
Expand Down
27 changes: 27 additions & 0 deletions fbr/management/commands/setup_periodic_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from django_celery_beat.models import CrontabSchedule, PeriodicTask

from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = "Setup periodic task for rebuilding cache"

def handle(self, *args, **kwargs):
# Create or get the crontab schedule
schedule, created = CrontabSchedule.objects.get_or_create(
minute="0", hour="1"
)
# Create the periodic task
task, created = PeriodicTask.objects.get_or_create(
crontab=schedule,
name="Rebuild Cache Daily",
task="fbr.cache.tasks.rebuild_cache",
)
if created:
self.stdout.write(
self.style.SUCCESS("Periodic task created successfully.")
)
else:
self.stdout.write(
self.style.WARNING("Periodic task already exists.")
)
6 changes: 5 additions & 1 deletion fbr/search/utils/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ def search_database(

# If an id is provided, return the document with that id
if config.id:
return DataResponseModel.objects.filter(id=config.id)
logger.debug(f"searching for document with id: {config.id}")
try:
return DataResponseModel.objects.get(id=config.id)
except DataResponseModel.DoesNotExist:
return DataResponseModel.objects.none()

# Sanatize the query string
query_str = sanitize_input(config.search_query)
Expand Down
3 changes: 1 addition & 2 deletions fbr/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pandas as pd

from django.conf import settings
from django.core.serializers import serialize
from django.http import HttpRequest, HttpResponse
from django.shortcuts import render
from django.views.decorators.http import require_http_methods
Expand Down Expand Up @@ -36,7 +35,7 @@ def document(request: HttpRequest, id) -> HttpResponse:

try:
queryset = search_database(config)
context["result"] = serialize("json", queryset)
context["result"] = queryset
except Exception as e:
logger.error("error fetching details: %s", e)
context["error"] = f"error fetching details: {e}"
Expand Down
20 changes: 20 additions & 0 deletions fbr/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from setuptools import find_packages, setup

setup(
name="fbr",
version="0.1",
packages=find_packages(),
install_requires=[
# Add your package dependencies here
"requests",
"pandas",
"django",
"dj_database_url",
],
entry_points={
"console_scripts": [
# Define command-line scripts here if needed
# e.g., 'my-command = fbr.module:function',
],
},
)
3 changes: 0 additions & 3 deletions local_deployment/entry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,5 @@ npm run build
echo "Collecting Static Files"
python fbr/manage.py collectstatic --noinput

# echo "Check missing migrations"
# python prompt_payments/manage.py makemigrations --check --dry-run

echo "Starting server"
python fbr/manage.py runserver 0.0.0.0:8080
Loading

0 comments on commit e7430ba

Please sign in to comment.