Skip to content

Commit

Permalink
Merge pull request #38 from uktrade/feature/orpd-54-search-term-query
Browse files Browse the repository at this point in the history
feat: add date parsing utility and improve sorting
  • Loading branch information
hareshkainthdbt authored Oct 30, 2024
2 parents 80c0c05 + baa578b commit 159b199
Showing 1 changed file with 56 additions and 12 deletions.
68 changes: 56 additions & 12 deletions orp/orp_search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import csv
import logging

from datetime import datetime, timezone

import dateutil.parser # type: ignore
import pandas as pd

from orp_search.legislation import Legislation
Expand Down Expand Up @@ -158,6 +161,42 @@ def download_search_csv(request: HttpRequest) -> HttpResponse:
return response


def _parse_date(date_value):
if isinstance(date_value, datetime):
if date_value.tzinfo is None:
# If the datetime is offset-naive, make it offset-aware in UTC
return date_value.replace(tzinfo=timezone.utc)
return date_value
if isinstance(date_value, str):
try:
dt = dateutil.parser.parse(date_value)
if dt.tzinfo is None:
# If parsed datetime is offset-naive,
# make it offset-aware in UTC
return dt.replace(tzinfo=timezone.utc)
return dt
except ValueError:
return None
return None # Return None for invalid date types


def _calculate_score(search_result, search_terms):
"""
Calculate the score of a search result based on the number of
search terms found in the title and description.
:param search_result: A dictionary containing the search result.
:param search_terms: A list of search terms to look for in the
search result.
:return: The score based on the number of search terms found.
"""
title = search_result.get("title", "") or ""
description = search_result.get("description", "") or ""
combined_content = title.lower() + " " + description.lower()
score = sum(combined_content.count(term.lower()) for term in search_terms)
return score


@require_http_methods(["GET"])
def search(request: HttpRequest) -> HttpResponse:
"""Search view.
Expand Down Expand Up @@ -243,20 +282,25 @@ def search(request: HttpRequest) -> HttpResponse:
legislation = Legislation()
search_results += legislation.search(config)

search_results_normalised = []

for result in search_results:
# If result is type of [] then extract each item and append to
# search_dict_results otherwise just append the result to
# search_dict_results
if isinstance(result, list):
for item in result:
search_results_normalised.append(item)
else:
search_results_normalised.append(result)

# Sort results by date_modified (recent) or relevance
# (calculate score and sort by score)
if sort_by == "recent":
search_results = sorted(
search_results,
key=lambda x: _parse_date(x["date_modified"]),
reverse=True,
)
elif sort_by == "relevance":
# Add the 'score' to each search result
for result in search_results:
logger.info("result to pass to calculate score: %s", result)
result["score"] = _calculate_score(result, config.search_terms)

search_results = sorted(
search_results,
key=lambda x: x["score"],
reverse=True,
)

# Paginate results
paginator = Paginator(search_results, config.limit)
Expand Down

0 comments on commit 159b199

Please sign in to comment.