Skip to content

Commit 7ea5b37

Browse files
committed
WIP ✨(backend) fixup & improve tests
Signed-off-by: Fabre Florian <[email protected]>
1 parent 2d19a4a commit 7ea5b37

File tree

9 files changed

+134
-150
lines changed

9 files changed

+134
-150
lines changed

src/backend/core/api/viewsets.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
YdocConverter,
5151
)
5252
from core.services.search_indexers import (
53-
default_document_indexer,
53+
get_document_indexer,
5454
get_visited_document_ids_of,
5555
)
5656
from core.tasks.mail import send_ask_for_access_mail
@@ -1042,21 +1042,30 @@ def duplicate(self, request, *args, **kwargs):
10421042
def search(self, request, *args, **kwargs):
10431043
"""
10441044
Returns a DRF response containing the filtered, annotated and ordered document list.
1045-
The filtering allows full text search through the opensearch indexation app "find".
1045+
1046+
Applies filtering based on request parameter 'q' from `FindDocumentSerializer`.
1047+
Depending of the configuration it can be:
1048+
- A fulltext search through the opensearch indexation app "find" if the backend is
1049+
enabled (see SEARCH_BACKEND_CLASS)
1050+
- A filtering by the model field 'title'.
1051+
1052+
The ordering is always by the most recent first.
10461053
"""
10471054
access_token = request.session.get("oidc_access_token")
10481055
user = request.user
10491056

10501057
serializer = serializers.FindDocumentSerializer(data=request.query_params)
10511058
serializer.is_valid(raise_exception=True)
10521059

1053-
indexer = default_document_indexer()
1060+
indexer = get_document_indexer()
1061+
text = serializer.validated_data["q"]
10541062

1063+
# The indexer is not configured, so we fallback on a simple filter on the
1064+
# model field 'title'.
10551065
if not indexer:
1066+
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
10561067
queryset = self.get_queryset()
1057-
filterset = DocumentFilter(
1058-
{"title": serializer.validated_data.get("q", "")}, queryset=queryset
1059-
)
1068+
filterset = DocumentFilter({"title": text}, queryset=queryset)
10601069

10611070
if not filterset.is_valid():
10621071
raise drf.exceptions.ValidationError(filterset.errors)
@@ -1071,15 +1080,17 @@ def search(self, request, *args, **kwargs):
10711080
)
10721081

10731082
queryset = models.Document.objects.all()
1083+
1084+
# Retrieve the documents ids from Find.
10741085
results = indexer.search(
1075-
text=serializer.validated_data.get("q", ""),
1086+
text=text,
10761087
token=access_token,
10771088
visited=get_visited_document_ids_of(queryset, user),
10781089
page=serializer.validated_data.get("page", 1),
10791090
page_size=serializer.validated_data.get("page_size", 20),
10801091
)
10811092

1082-
queryset = queryset.filter(pk__in=results)
1093+
queryset = queryset.filter(pk__in=results).order_by("-updated_at")
10831094

10841095
return self.get_response_for_queryset(
10851096
queryset,

src/backend/core/services/search_indexers.py

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,37 +19,24 @@
1919

2020

2121
@cache
22-
def default_document_indexer():
23-
"""Returns default indexer service is enabled and properly configured."""
22+
def get_document_indexer():
23+
"""Returns an instance of indexer service if enabled and properly configured."""
24+
classpath = settings.SEARCH_INDEXER_CLASS
2425

2526
# For this usecase an empty indexer class is not an issue but a feature.
26-
if not getattr(settings, "SEARCH_INDEXER_CLASS", None):
27+
if not classpath:
2728
logger.info("Document indexer is not configured (see SEARCH_INDEXER_CLASS)")
2829
return None
2930

3031
try:
31-
return get_document_indexer_class()()
32+
indexer_class = import_string(settings.SEARCH_INDEXER_CLASS)
33+
return indexer_class()
34+
except ImportError as err:
35+
logger.error("SEARCH_INDEXER_CLASS setting is not valid : %s", err)
3236
except ImproperlyConfigured as err:
3337
logger.error("Document indexer is not properly configured : %s", err)
34-
return None
3538

36-
37-
@cache
38-
def get_document_indexer_class():
39-
"""Return the indexer backend class based on the settings."""
40-
classpath = settings.SEARCH_INDEXER_CLASS
41-
42-
if not classpath:
43-
raise ImproperlyConfigured(
44-
"SEARCH_INDEXER_CLASS must be set in Django settings."
45-
)
46-
47-
try:
48-
return import_string(settings.SEARCH_INDEXER_CLASS)
49-
except ImportError as err:
50-
raise ImproperlyConfigured(
51-
f"SEARCH_INDEXER_CLASS setting is not valid : {err}"
52-
) from err
39+
return None
5340

5441

5542
def get_batch_accesses_by_users_and_teams(paths):
@@ -100,9 +87,11 @@ def get_visited_document_ids_of(queryset, user):
10087
ancestors_deleted_at__isnull=True,
10188
)
10289
.filter(pk__in=Subquery(qs.values("document_id")))
90+
.order_by("pk")
91+
.distinct("pk")
10392
)
10493

105-
return list({str(id) for id in docs.values_list("pk", flat=True)})
94+
return [str(id) for id in docs.values_list("pk", flat=True)]
10695

10796

10897
class BaseDocumentIndexer(ABC):

src/backend/core/signals.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from django.dispatch import receiver
1010

1111
from . import models
12-
from .services.search_indexers import default_document_indexer
1312
from .tasks.find import trigger_document_indexer
1413

1514

@@ -20,14 +19,13 @@ def document_post_save(sender, instance, **kwargs): # pylint: disable=unused-ar
2019
Note : Within the transaction we can have an empty content and a serialization
2120
error.
2221
"""
23-
if default_document_indexer() is not None:
24-
transaction.on_commit(partial(trigger_document_indexer, instance))
22+
transaction.on_commit(partial(trigger_document_indexer, instance))
2523

2624

2725
@receiver(signals.post_save, sender=models.DocumentAccess)
2826
def document_access_post_save(sender, instance, created, **kwargs): # pylint: disable=unused-argument
2927
"""
3028
Asynchronous call to the document indexer at the end of the transaction.
3129
"""
32-
if not created and default_document_indexer() is not None:
30+
if not created:
3331
transaction.on_commit(partial(trigger_document_indexer, instance.document))

src/backend/core/tasks/find.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,21 @@
1010
logger = getLogger(__file__)
1111

1212

13-
def document_indexer_debounce_key(document_id):
14-
"""Returns debounce cache key"""
15-
return f"doc-indexer-debounce-{document_id}"
16-
17-
18-
def incr_counter(key):
13+
def indexer_debounce_lock(document_id):
1914
"""Increase or reset counter"""
15+
key = f"doc-indexer-debounce-{document_id}"
16+
2017
try:
2118
return cache.incr(key)
2219
except ValueError:
2320
cache.set(key, 1)
2421
return 1
2522

2623

27-
def decr_counter(key):
24+
def indexer_debounce_release(document_id):
2825
"""Decrease or reset counter"""
26+
key = f"doc-indexer-debounce-{document_id}"
27+
2928
try:
3029
return cache.decr(key)
3130
except ValueError:
@@ -36,24 +35,26 @@ def decr_counter(key):
3635
@app.task
3736
def document_indexer_task(document_id):
3837
"""Celery Task : Sends indexation query for a document."""
39-
key = document_indexer_debounce_key(document_id)
38+
# Prevents some circular imports
39+
# pylint: disable=import-outside-toplevel
40+
from core import models # noqa : PLC0415
41+
from core.services.search_indexers import ( # noqa : PLC0415
42+
get_batch_accesses_by_users_and_teams,
43+
get_document_indexer,
44+
)
4045

4146
# check if the counter : if still up, skip the task. only the last one
4247
# within the countdown delay will do the query.
43-
if decr_counter(key) > 0:
48+
if indexer_debounce_release(document_id) > 0:
4449
logger.info("Skip document %s indexation", document_id)
4550
return
4651

47-
# Prevents some circular imports
48-
# pylint: disable=import-outside-toplevel
49-
from core import models # noqa: PLC0415
50-
from core.services.search_indexers import ( # noqa: PLC0415
51-
get_batch_accesses_by_users_and_teams,
52-
get_document_indexer_class,
53-
)
52+
indexer = get_document_indexer()
53+
54+
if indexer is None:
55+
return
5456

5557
doc = models.Document.objects.get(pk=document_id)
56-
indexer = get_document_indexer_class()()
5758
accesses = get_batch_accesses_by_users_and_teams((doc.path,))
5859

5960
data = indexer.serialize_document(document=doc, accesses=accesses)
@@ -69,11 +70,7 @@ def trigger_document_indexer(document):
6970
Args:
7071
document (Document): The document instance.
7172
"""
72-
if document.deleted_at or document.ancestors_deleted_at:
73-
return
74-
75-
key = document_indexer_debounce_key(document.pk)
76-
countdown = getattr(settings, "SEARCH_INDEXER_COUNTDOWN", 1)
73+
countdown = settings.SEARCH_INDEXER_COUNTDOWN
7774

7875
logger.info(
7976
"Add task for document %s indexation in %.2f seconds",
@@ -83,6 +80,6 @@ def trigger_document_indexer(document):
8380

8481
# Each time this method is called during the countdown, we increment the
8582
# counter and each task decrease it, so the index be run only once.
86-
incr_counter(key)
83+
indexer_debounce_lock(document.pk)
8784

8885
document_indexer_task.apply_async(args=[document.pk], countdown=countdown)

src/backend/core/tests/conftest.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ def indexer_settings_fixture(settings):
3434

3535
# pylint: disable-next=import-outside-toplevel
3636
from core.services.search_indexers import ( # noqa: PLC0415
37-
default_document_indexer,
38-
get_document_indexer_class,
37+
get_document_indexer,
3938
)
4039

41-
default_document_indexer.cache_clear()
42-
get_document_indexer_class.cache_clear()
40+
get_document_indexer.cache_clear()
4341

4442
settings.SEARCH_INDEXER_CLASS = "core.services.search_indexers.FindDocumentIndexer"
4543
settings.SEARCH_INDEXER_SECRET = "ThisIsAKeyForTest"
@@ -51,5 +49,4 @@ def indexer_settings_fixture(settings):
5149
yield settings
5250

5351
# clear cache to prevent issues with other tests
54-
default_document_indexer.cache_clear()
55-
get_document_indexer_class.cache_clear()
52+
get_document_indexer.cache_clear()

src/backend/core/tests/documents/test_api_documents_search.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from rest_framework.test import APIClient
1111

1212
from core import factories, models
13-
from core.services.search_indexers import default_document_indexer
13+
from core.services.search_indexers import get_document_indexer
1414

1515
fake = Faker()
1616
pytestmark = pytest.mark.django_db
@@ -54,7 +54,7 @@ def test_api_documents_search_endpoint_is_none(indexer_settings):
5454
"""
5555
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
5656

57-
assert default_document_indexer() is None
57+
assert get_document_indexer() is None
5858

5959
user = factories.UserFactory()
6060
document = factories.DocumentFactory(title="alpha")
@@ -130,7 +130,7 @@ def test_api_documents_search_format(indexer_settings):
130130
"""Validate the format of documents as returned by the search view."""
131131
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
132132

133-
assert default_document_indexer() is not None
133+
assert get_document_indexer() is not None
134134

135135
user = factories.UserFactory()
136136

@@ -193,7 +193,7 @@ def test_api_documents_search_pagination(indexer_settings):
193193
"""Documents should be ordered by descending "updated_at" by default"""
194194
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
195195

196-
assert default_document_indexer() is not None
196+
assert get_document_indexer() is not None
197197

198198
user = factories.UserFactory()
199199

0 commit comments

Comments
 (0)