Skip to content

Commit

Permalink
add prototype function to support search typeahead
Browse files Browse the repository at this point in the history
  • Loading branch information
alex75 committed Oct 23, 2024
1 parent 0083d1b commit fe4129a
Showing 1 changed file with 59 additions and 0 deletions.
59 changes: 59 additions & 0 deletions cads_catalogue_api_service/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,65 @@
WEIGHT_FULLTEXT = 0.03


def apply_filters_typeahead(
session: sa.orm.Session,
chars: str,
search: sa.orm.Query | None,
portals: list[str] | None = None,
limit: int | None = None,
):
"""Apply filters to return words matching initial input characters, as suggestions for searching datasets.
Args:
session: sqlalchemy session object
chars: characters of the words to find
search: current dataset query
portals: list of datasets portals to consider
limit: if specified, limit length of resulting words
"""
if search is None:
search = session.query(cads_catalogue.database.Resource)
search = search.filter(cads_catalogue.database.Resource.hidden == False) # noqa E712
if portals:
search = search.filter(cads_catalogue.database.Resource.portal.in_(portals))
g = sa.func.unnest(
sa.func.string_to_array(
sa.func.lower(cads_catalogue.database.Resource.title), " "
)
).label("g")
t = search.with_entities(g).scalar_subquery().alias("t")
suggestion = sa.func.unnest(sa.func.array_agg(sa.func.distinct(t.c.g))).label(
"suggestion"
)
tt = session.query(suggestion).select_from(t).scalar_subquery().alias("tt")
# consider only (resulting words with length > 2) AND (words starting with chars):
filter = sa.and_(
sa.func.length(tt.c.suggestion).__gt__(2), tt.c.suggestion.ilike(chars + "%")
)
search = (
session.query(tt.c.suggestion)
.select_from(tt)
.filter(filter)
.order_by(tt.c.suggestion)
)
if limit is not None:
search = search.limit(limit) # type: ignore

# final sql should be something like:
# SELECT suggestion FROM
# (
# SELECT unnest(array_agg(DISTINCT t.g)) AS suggestion FROM
# (
# SELECT unnest(string_to_array(lower(title), ' ')) AS g FROM resources
# WHERE resources.hidden = true AND resources.portal IN ('cams', 'c3s')
# )
# as t
# ) as tt
# WHERE length(tt.suggestion) > 2 and tt.suggestion ilike 'er%' limit 10;

return search


def split_by_category(keywords: list) -> list:
"""Given a list of keywords composed by a "category: value", split them in multiple lists.
Expand Down

0 comments on commit fe4129a

Please sign in to comment.