Skip to content

Commit

Permalink
add proximity filter to legal search
Browse files Browse the repository at this point in the history
  • Loading branch information
tmpayton committed Dec 19, 2024
1 parent 02b2fbd commit 0a14d87
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 7 deletions.
4 changes: 4 additions & 0 deletions webservices/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,10 @@ def make_seek_args(field=fields.Int, description=None):
'sort': IStr(required=False, description=docs.SORT),
'case_min_penalty_amount': fields.Str(required=False, description=docs.CASE_MIN_PENALTY_AMOUNT),
'case_max_penalty_amount': fields.Str(required=False, description=docs.CASE_MAX_PENALTY_AMOUNT),
'q_proximity': fields.List(fields.Str, description=docs.Q_PROXIMITY),
'max_gaps': fields.Int(required=False, description=docs.MAX_GAPS),
"proximity_filter": fields.Str(validate=validate.OneOf(["after", "before"]), description=docs.PROXIMITY_FILTER),
'proximity_filter_term': fields.Str(required=False, description=docs.PROXIMITY_FILTER_TERM),
}

citation = {
Expand Down
19 changes: 19 additions & 0 deletions webservices/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2354,6 +2354,25 @@ def add_ytd(var):
Show cases with a penalty less than this amount
'''

Q_PROXIMITY = '''
This search identifies documents where the specified phrases appear near each other. The field supports both a single \
phrase or multiple phrases. For a single phrase, the maximum gap is applied between the words in the phrase. For \
multiple phrases, the maximum gap is applied between the phrases themselves.
'''

MAX_GAPS = '''
The maximum number of positions allowed between matching terms specified in q_proximity
'''

PROXIMITY_FILTER = '''
Adds additional filters to the proximity search that provides options to specify positional constraints
'''

PROXIMITY_FILTER_TERM = '''
q_proximity phrase
'''

# ======== legal end =========


Expand Down
65 changes: 58 additions & 7 deletions webservices/resources/legal.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,19 +180,28 @@ def generic_query_builder(q, type_, from_hit, hits_returned, **kwargs):
.index(SEARCH_ALIAS)
.sort("sort1", "sort2")
)
if type_ == "advisory_opinions":
query = query.highlight("summary", "documents.text", "documents.description")
elif type_ == "statutes":
query = query.highlight("name", "no")
else:
query = query.highlight("documents.text", "documents.description")
proximity_search = False

if kwargs.get("q_proximity") and kwargs.get("max_gaps") and type_ != "statutes":
proximity_search = True

if not proximity_search:
if type_ == "advisory_opinions":
query = query.highlight("summary", "documents.text", "documents.description")
elif type_ == "statutes":
query = query.highlight("name", "no")
else:
query = query.highlight("documents.text", "documents.description")

if kwargs.get("q_exclude"):
must_not = []
must_not.append(Q("nested", path="documents", query=Q("match", documents__text=kwargs.get("q_exclude"))))
query = query.query("bool", must_not=must_not)

# logger.debug("generic_query_builder =" + json.dumps(query.to_dict(), indent=3, cls=DateTimeEncoder))
if proximity_search:
query = get_proximity_query(q, query, **kwargs)

# logging.warning("generic_query_builder =" + json.dumps(query.to_dict(), indent=3, cls=DateTimeEncoder))
return query


Expand Down Expand Up @@ -266,6 +275,48 @@ def case_query_builder(q, type_, from_hit, hits_returned, **kwargs):
else:
return apply_adr_specific_query_params(query, **kwargs)


def get_proximity_query(q, query, **kwargs):
q_proximity = kwargs.get("q_proximity")
max_gaps = kwargs.get("max_gaps")
intervals_list = []
contains_filter = False

if kwargs.get("proximity_filter") and kwargs.get("proximity_filter_term"):
contains_filter = True
filter = kwargs.get("proximity_filter")
filters = {filter: {'match': {'query': kwargs.get("proximity_filter_term")}}}

if len(q_proximity) == 1:
if contains_filter:
intervals_inner_query = Q('intervals', documents__text={
'match': {'query': q_proximity[0], 'max_gaps': max_gaps, "filter": filters}
})
else:
intervals_inner_query = Q('intervals', documents__text={
'match': {'query': q_proximity[0], 'max_gaps': max_gaps}
})
else:
for q in q_proximity:
dict_item = {"match": {"query": q, "max_gaps": 0, }}
intervals_list.append(dict_item)

if contains_filter:
intervals_inner_query = Q('intervals', documents__text={
'all_of': {'max_gaps': max_gaps, "intervals": intervals_list, "filter": filters}
})
else:
intervals_inner_query = Q('intervals', documents__text={
'all_of': {'max_gaps': max_gaps, "intervals": intervals_list}
})

intervals_query = Q(
"nested",
path="documents",
query=intervals_inner_query)

return query.query("bool", must=intervals_query)

# Select one or more case_doc_category_id to filter by corresponding case_document_category
# - 1 - Conciliation and Settlement Agreements
# - 2 - Complaint, Responses, Designation of Counsel and Extensions of Time
Expand Down

0 comments on commit 0a14d87

Please sign in to comment.