Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(orpd-120):implemented cache with cron job #62

Merged
merged 7 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,10 @@ isort: # Run isort

secrets-baseline: # Generate a new secrets baseline file
poetry run detect-secrets scan > .secrets.baseline

rebuild_cache_man:
export PYTHONPATH=./fbr && \
export DJANGO_SETTINGS_MODULE='fbr.config.settings.local' && \
export DATABASE_URL=postgres://postgres:postgres@localhost:5432/fbr && \
poetry install && \
poetry run rebuild-cache
Empty file added fbr/cache/__init__.py
Empty file.
14 changes: 8 additions & 6 deletions fbr/search/legislation.py → fbr/cache/legislation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# isort: skip_file
# fmt: off
# flake8: noqa

import logging
import re
import xml.etree.ElementTree as ET # nosec BXXX
Expand All @@ -6,12 +10,10 @@

import requests # type: ignore

from search.config import SearchDocumentConfig
from search.construction_legislation import ( # noqa: E501
construction_legislation_dataframe,
)
from search.utils.date import convert_date_string_to_obj
from search.utils.documents import ( # noqa: E501
from fbr.cache.construction_legislation import construction_legislation_dataframe
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.date import convert_date_string_to_obj
from fbr.search.utils.documents import ( # noqa: E501
generate_short_uuid,
insert_or_update_document,
)
Expand Down
27 changes: 27 additions & 0 deletions fbr/cache/manage_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# flake8: noqa
import os
import time

import django

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")

django.setup()

from fbr.cache.legislation import Legislation
from fbr.cache.public_gateway import PublicGateway
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.documents import clear_all_documents


def rebuild_cache():
try:
start = time.time()
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
end = time.time()
return {"message": "rebuilt cache", "duration": round(end - start, 2)}
except Exception as e:
return {"message": f"error clearing documents: {e}"}
6 changes: 3 additions & 3 deletions fbr/search/public_gateway.py → fbr/cache/public_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

import requests # type: ignore

from search.utils.date import convert_date_string_to_obj
from search.utils.documents import ( # noqa: E501
from fbr.search.utils.date import convert_date_string_to_obj
from fbr.search.utils.documents import ( # noqa: E501
generate_short_uuid,
insert_or_update_document,
)
Expand Down Expand Up @@ -86,7 +86,7 @@ def build_cache(self, config):
row["date_valid"] = convert_date_string_to_obj(
row.get("date_valid")
)
row["id"] = (generate_short_uuid(),)
row["id"] = generate_short_uuid()

row["publisher_id"] = (
None
Expand Down
22 changes: 22 additions & 0 deletions fbr/cache/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import time

from celery import shared_task

from fbr.cache.legislation import Legislation
from fbr.cache.public_gateway import PublicGateway
from fbr.search.config import SearchDocumentConfig
from fbr.search.utils.documents import clear_all_documents


@shared_task()
def rebuild_cache():
try:
start = time.time()
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
end = time.time()
return {"message": "rebuilt cache", "duration": round(end - start, 2)}
except Exception as e:
return {"message": f"error clearing documents: {e}"}
9 changes: 9 additions & 0 deletions fbr/config/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from celery import Celery

app = Celery("fbr_celery")

# Load settings from Django or directly
app.config_from_object("django.conf:settings", namespace="CELERY")

# Auto-discover tasks in installed apps
app.autodiscover_tasks()
7 changes: 7 additions & 0 deletions fbr/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@

THIRD_PARTY_APPS: list = [
"webpack_loader",
"django_celery_beat",
]

INSTALLED_APPS = DJANGO_APPS + LOCAL_APPS + THIRD_PARTY_APPS
Expand Down Expand Up @@ -262,3 +263,9 @@
GOOGLE_ANALYTICS_TAG_MANAGER_ID = env(
"GOOGLE_ANALYTICS_TAG_MANAGER_ID", default=None
)

# Celery
CELERY_BROKER_URL = "redis://<redis-host>:6379/0" # TODO: actual value
CELERY_ACCEPT_CONTENT = ["json"]
CELERY_TASK_SERIALIZER = "json"
CELERY_RESULT_BACKEND = "redis://<redis-host>:6379/0"
36 changes: 0 additions & 36 deletions fbr/config/urls.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Find business regulations URL configuration."""

import logging
import time

from rest_framework import routers, serializers, status, viewsets
from rest_framework.decorators import action
Expand All @@ -14,9 +13,7 @@
import core.views as core_views
import search.views as search_views

from search.config import SearchDocumentConfig
from search.models import DataResponseModel
from search.utils.documents import clear_all_documents
from search.utils.search import get_publisher_names, search

urls_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,38 +88,6 @@ def search(self, request, *args, **kwargs):
)


class RebuildCacheViewSet(viewsets.ViewSet):
@action(detail=False, methods=["post"], url_path="rebuild")
def rebuild_cache(self, request, *args, **kwargs):
from search.legislation import Legislation
from search.public_gateway import PublicGateway

tx_begin = time.time()
try:
clear_all_documents()
config = SearchDocumentConfig(search_query="", timeout=20)
Legislation().build_cache(config)
PublicGateway().build_cache(config)
except Exception as e:
return Response(
data={"message": f"[urls] error clearing documents: {e}"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

tx_end = time.time()
urls_logger.info(
f"time taken to rebuild cache: "
f"{round(tx_end - tx_begin, 2)} seconds"
)
return Response(
data={
"message": "rebuilt cache",
"duration": round(tx_end - tx_begin, 2),
},
status=status.HTTP_200_OK,
)


class PublishersViewSet(viewsets.ViewSet):
@action(detail=False, methods=["get"], url_path="publishers")
def publishers(self, request, *args, **kwargs):
Expand Down Expand Up @@ -151,7 +116,6 @@ def publishers(self, request, *args, **kwargs):
# Routers provide an easy way of automatically determining the URL conf.
router = routers.DefaultRouter()
router.register(r"v1", DataResponseViewSet, basename="search")
router.register(r"v1/cache", RebuildCacheViewSet, basename="rebuild")
router.register(r"v1/retrieve", PublishersViewSet, basename="publishers")

urlpatterns = [
Expand Down
27 changes: 27 additions & 0 deletions fbr/management/commands/setup_periodic_task.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from django_celery_beat.models import CrontabSchedule, PeriodicTask

from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = "Setup periodic task for rebuilding cache"

def handle(self, *args, **kwargs):
# Create or get the crontab schedule
schedule, created = CrontabSchedule.objects.get_or_create(
minute="0", hour="1"
)
# Create the periodic task
task, created = PeriodicTask.objects.get_or_create(
crontab=schedule,
name="Rebuild Cache Daily",
task="fbr.cache.tasks.rebuild_cache",
)
if created:
self.stdout.write(
self.style.SUCCESS("Periodic task created successfully.")
)
else:
self.stdout.write(
self.style.WARNING("Periodic task already exists.")
)
6 changes: 5 additions & 1 deletion fbr/search/utils/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ def search_database(

# If an id is provided, return the document with that id
if config.id:
return DataResponseModel.objects.filter(id=config.id)
logger.debug(f"searching for document with id: {config.id}")
try:
return DataResponseModel.objects.get(id=config.id)
except DataResponseModel.DoesNotExist:
return DataResponseModel.objects.none()

# Sanatize the query string
query_str = sanitize_input(config.search_query)
Expand Down
3 changes: 1 addition & 2 deletions fbr/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pandas as pd

from django.conf import settings
from django.core.serializers import serialize
from django.http import HttpRequest, HttpResponse
from django.shortcuts import render
from django.views.decorators.http import require_http_methods
Expand Down Expand Up @@ -36,7 +35,7 @@ def document(request: HttpRequest, id) -> HttpResponse:

try:
queryset = search_database(config)
context["result"] = serialize("json", queryset)
context["result"] = queryset
except Exception as e:
logger.error("error fetching details: %s", e)
context["error"] = f"error fetching details: {e}"
Expand Down
20 changes: 20 additions & 0 deletions fbr/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from setuptools import find_packages, setup

setup(
name="fbr",
version="0.1",
packages=find_packages(),
install_requires=[
# Add your package dependencies here
"requests",
"pandas",
"django",
"dj_database_url",
],
entry_points={
"console_scripts": [
# Define command-line scripts here if needed
# e.g., 'my-command = fbr.module:function',
],
},
)
3 changes: 0 additions & 3 deletions local_deployment/entry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,5 @@ npm run build
echo "Collecting Static Files"
python fbr/manage.py collectstatic --noinput

# echo "Check missing migrations"
# python prompt_payments/manage.py makemigrations --check --dry-run

echo "Starting server"
python fbr/manage.py runserver 0.0.0.0:8080
Loading