Skip to content

Commit

Permalink
feat:add search functionality and caching
Browse files Browse the repository at this point in the history
Introduced new classes for search config and documents. Implemented search and caching in views, and added new dependencies. Updated HTML and README for improvements.
  • Loading branch information
hareshkainthdbt committed Oct 8, 2024
1 parent df978e5 commit 7ea3c46
Show file tree
Hide file tree
Showing 13 changed files with 541 additions and 10 deletions.
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Create the initial database:
$ make database

> The `make database` command will create a `PostgreSQL` database. If you have
> an existing database and want to start from scratch, use `make drop-databse`
> an existing database and want to start from scratch, use `make drop-database`
> to delete an existing database first.
Prepare the application for first use:
Expand Down Expand Up @@ -98,3 +98,22 @@ With your Poetry shell active:
> This will ensure that your code passes quality checks before you commit it.
> Code quality checks are also performed when pushing your code to origin
> but pre-commit hooks catch issues early and will improve Developer Experience.

### Update database tables

> To update local database tables, you need to set the `DATABASE_URL` environment variable. You can set it in the terminal or in the `.env` file.
<!-- pragma: allowlist secret --> $ export DATABASE_URL=postgres://postgres:postgres@localhost:5432/orp

> If you want to migrate all apps then navigate /orp/orp and use the following command:
$ poetry run python manage.py migrate

> If you want to migrate a single app then navigate /orp/orp and use the following command:
$ poetry run python manage.py migrate <app_name>



poetry add boto3 awswrangler
Empty file added __init__.py
Empty file.
17 changes: 17 additions & 0 deletions orp/core/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,20 @@ class RegulationSearchForm(forms.Form):
}
),
)

document_type = forms.MultipleChoiceField(
required=False,
choices=[
("employment-tribunal", "Legislation"),
("MOD", "Guidance"),
("DfT", "Statutory guidance"),
],
widget=forms.CheckboxSelectMultiple(
attrs={
"class": "govuk-checkboxes__input",
"data-module": "govuk-checkboxes",
}
),
label="Select document types",
help_text="You can select multiple document types.",
)
2 changes: 1 addition & 1 deletion orp/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def main():
"""Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings.local")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
Expand Down
38 changes: 38 additions & 0 deletions orp/orp_search/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import logging

logger = logging.getLogger(__name__)


class SearchDocumentConfig:
def __init__(self, search_terms: str, document_types=None, timeout=None):
"""
Initializes a new instance of the class.
:param searchTerms: A comma-separated string of search terms.
:param documentTypes: Optional. A list of document types
to filter the search.
:param timeout: Optional. The timeout in seconds for the search
request.
"""
self.search_terms = [term.strip() for term in search_terms.split(",")]
self.document_types = document_types
self.timeout = None if timeout is None else int(timeout)

def validate(self):
"""
Validates the presence of search terms.
Checks if the 'searchTerms' attribute exists and is non-empty. Logs
an error message and returns False if 'searchTerms' is missing or
empty.
Returns
-------
bool
True if 'searchTerms' is present and non-empty, False otherwise.
"""
if not self.search_terms:
logger.error("search terms are required")
return False
return True
13 changes: 13 additions & 0 deletions orp/orp_search/management/commands/lean_expired_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.core.management.base import BaseCommand

from ...models import PublicGatewayCache


class Command(BaseCommand):
help = "clean up expired cache entries"

def handle(self, *args, **kwargs):
PublicGatewayCache.clean_up_expired_entries()
self.stdout.write(
self.style.SUCCESS("successfully cleaned up expired cache entries")
)
31 changes: 31 additions & 0 deletions orp/orp_search/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 4.2.15 on 2024-10-02 14:53

from django.db import migrations, models


class Migration(migrations.Migration):

initial = True

dependencies = []

operations = [
migrations.CreateModel(
name="PublicGatewayCache",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("search_terms", models.CharField(max_length=255)),
("document_types", models.JSONField()),
("response", models.TextField()),
("created_at", models.DateTimeField(auto_now_add=True)),
],
),
]
Empty file.
61 changes: 61 additions & 0 deletions orp/orp_search/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import json

from datetime import timedelta

from orp_search.public_gateway import SearchDocumentConfig

from django.db import models
from django.utils import timezone


class PublicGatewayCache(models.Model):
search_terms = models.CharField(max_length=255)
document_types = models.JSONField()
response = models.TextField()
created_at = models.DateTimeField(auto_now_add=True) # Timestamp for TTL

TTL = timedelta(days=1) # Time-To-Live duration for cache entries

@staticmethod
def _config_to_key(config: SearchDocumentConfig):
# Convert config to a tuple that can be used as a key
return config.search_terms, json.dumps(
config.document_types, sort_keys=True
)

@classmethod
def get_cached_response(cls, config):
# Look up the cached response for the given config
key = cls._config_to_key(config)
try:
cache_entry = cls.objects.get(
search_terms=key[0], document_types=key[1]
)
if cls.is_expired(cache_entry):
# If expired, delete it and return None
cache_entry.delete()
return None
return cache_entry.response
except cls.DoesNotExist:
return None

@classmethod
def cache_response(cls, config, response):
# Store the response in the cache
key = cls._config_to_key(config)
cache_entry, created = cls.objects.update_or_create(
search_terms=key[0],
document_types=key[1],
defaults={"response": response, "created_at": timezone.now()},
)
return cache_entry

@classmethod
def is_expired(cls, cache_entry):
# Check if the cache entry has expired
return timezone.now() > cache_entry.created_at + cls.TTL

@classmethod
def clean_up_expired_entries(cls):
# Delete expired cache entries
cls.objects.filter(created_at__lt=timezone.now() - cls.TTL).delete()
93 changes: 93 additions & 0 deletions orp/orp_search/public_gateway.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import logging

import requests # type: ignore

from jinja2 import Template
from orp_search.config import SearchDocumentConfig

logger = logging.getLogger(__name__)


class PublicGateway:
def __init__(self):
"""
Initializes the API client with the base URL for the Trade Data API.
Attributes:
base_url (str): The base URL of the Trade Data API.
"""
self.base_url = "https://data.api.trade.gov.uk"

def _build_like_conditions(self, field, terms):
"""
Generates SQL LIKE conditions.
Args:
field (str): The database field to apply the LIKE condition to.
terms (list of str): A list of terms to include in the LIKE
condition.
Returns:
str: A string containing the LIKE conditions combined with 'OR'.
"""
return " OR ".join([f"{field} LIKE '%{term}%'" for term in terms])

def search(self, config: SearchDocumentConfig):
logger.info("searching for market barriers")
# Base URL for the API
# TODO: need to use aws parameter store to store the base url
url = (
"https://data.api.trade.gov.uk/v1/datasets/market-barriers"
"/versions/v1.0.10/data"
)

# List of search terms
title_search_terms = config.search_terms
summary_search_terms = config.search_terms

# Build the WHERE clause
# TODO: need to use aws parameter store to store the field names
title_conditions = self._build_like_conditions(
"b.title", title_search_terms
)
summary_conditions = self._build_like_conditions(
"b.summary", summary_search_terms
)

# SQL query to filter based on title and summary containing search
# terms
# TODO: we are using example data here, this needs to be updated with
# the actual table and field names
query_template = """
SELECT *
FROM S3Object[*].barriers[*] b
WHERE ({{ title_conditions }}) AND ({{ summary_conditions }})
"""

template = Template(query_template)
query = template.render(
title_conditions=title_conditions,
summary_conditions=summary_conditions,
)

# URL encode the query for the API request
params = {"format": "json", "query-s3-select": query}

# Log the query with parameters
logger.info("request will contain the following query: %s", query)
logger.info(
"request will contain the following parameters: %s", params
)

# Make the GET request
response = requests.get(url, params=params, timeout=config.timeout)

# Check if the request was successful
if response.status_code == 200:
data = response.text
logger.info("data fetched successfully: %s", data)
return data
else:
logger.error("data fetch failed: %s", response.text)
return None
Loading

0 comments on commit 7ea3c46

Please sign in to comment.