8
8
from django .conf import settings
9
9
from django .contrib .auth .models import AnonymousUser
10
10
from django .core .exceptions import ImproperlyConfigured
11
+ from django .db .models import Subquery
11
12
from django .utils .module_loading import import_string
12
13
13
14
import requests
18
19
19
20
20
21
@cache
21
- def get_document_indexer_class () -> "BaseDocumentIndexer" :
22
+ def default_document_indexer ():
23
+ """Returns default indexer service is enabled and properly configured."""
24
+
25
+ # For this usecase an empty indexer class is not an issue but a feature.
26
+ if not getattr (settings , "SEARCH_INDEXER_CLASS" , None ):
27
+ logger .info ("Document indexer is not configured (see SEARCH_INDEXER_CLASS)" )
28
+ return None
29
+
30
+ try :
31
+ return get_document_indexer_class ()()
32
+ except ImproperlyConfigured as err :
33
+ logger .error ("Document indexer is not properly configured : %s" , err )
34
+ return None
35
+
36
+
37
+ @cache
38
+ def get_document_indexer_class ():
22
39
"""Return the indexer backend class based on the settings."""
23
40
classpath = settings .SEARCH_INDEXER_CLASS
24
41
@@ -65,7 +82,7 @@ def get_batch_accesses_by_users_and_teams(paths):
65
82
return dict (access_by_document_path )
66
83
67
84
68
- def get_visited_document_ids_of (user ):
85
+ def get_visited_document_ids_of (queryset , user ):
69
86
"""
70
87
Returns the ids of the documents that have a linktrace to the user and NOT owned.
71
88
It will be use to limit the opensearch responses to the public documents already
@@ -74,11 +91,18 @@ def get_visited_document_ids_of(user):
74
91
if isinstance (user , AnonymousUser ):
75
92
return []
76
93
77
- qs = models .LinkTrace .objects .filter (user = user ).exclude (
78
- document__accesses__user = user ,
94
+ qs = models .LinkTrace .objects .filter (user = user )
95
+
96
+ docs = (
97
+ queryset .exclude (accesses__user = user )
98
+ .filter (
99
+ deleted_at__isnull = True ,
100
+ ancestors_deleted_at__isnull = True ,
101
+ )
102
+ .filter (pk__in = Subquery (qs .values ("document_id" )))
79
103
)
80
104
81
- return list ({str (id ) for id in qs .values_list ("document_id " , flat = True )})
105
+ return list ({str (id ) for id in docs .values_list ("pk " , flat = True )})
82
106
83
107
84
108
class BaseDocumentIndexer (ABC ):
@@ -159,22 +183,41 @@ def push(self, data):
159
183
Must be implemented by subclasses.
160
184
"""
161
185
162
- def search (self , text , user , token ):
186
+ # pylint: disable-next=too-many-arguments,too-many-positional-arguments
187
+ def search (self , text , token , visited = (), page = 1 , page_size = 50 ):
163
188
"""
164
189
Search for documents in Find app.
165
- """
166
- visited_ids = get_visited_document_ids_of (user )
190
+ Ensure the same default ordering as "Docs" list : -updated_at
167
191
192
+ Returns ids of the documents
193
+
194
+ Args:
195
+ text (str): Text search content.
196
+ token (str): OIDC Authentication token.
197
+ visited (list, optional):
198
+ List of ids of active public documents with LinkTrace
199
+ Defaults to settings.SEARCH_INDEXER_BATCH_SIZE.
200
+ page (int, optional):
201
+ The page number to retrieve.
202
+ Defaults to 1 if not specified.
203
+ page_size (int, optional):
204
+ The number of results to return per page.
205
+ Defaults to 50 if not specified.
206
+ """
168
207
response = self .search_query (
169
208
data = {
170
209
"q" : text ,
171
- "visited" : visited_ids ,
210
+ "visited" : visited ,
172
211
"services" : ["docs" ],
212
+ "page_number" : page ,
213
+ "page_size" : page_size ,
214
+ "order_by" : "updated_at" ,
215
+ "order_direction" : "desc" ,
173
216
},
174
217
token = token ,
175
218
)
176
219
177
- return self . format_response ( response )
220
+ return [ d [ "_id" ] for d in response ]
178
221
179
222
@abstractmethod
180
223
def search_query (self , data , token ) -> dict :
@@ -184,14 +227,6 @@ def search_query(self, data, token) -> dict:
184
227
Must be implemented by subclasses.
185
228
"""
186
229
187
- @abstractmethod
188
- def format_response (self , data : dict ):
189
- """
190
- Convert the JSON response from Find app as document queryset.
191
-
192
- Must be implemented by subclasses.
193
- """
194
-
195
230
196
231
class FindDocumentIndexer (BaseDocumentIndexer ):
197
232
"""
@@ -253,20 +288,13 @@ def search_query(self, data, token) -> requests.Response:
253
288
logger .error ("HTTPError: %s" , e )
254
289
raise
255
290
256
- def format_response (self , data : dict ):
257
- """
258
- Retrieve documents ids from Find app response and return a queryset.
259
- """
260
- return models .Document .objects .filter (pk__in = [d ["_id" ] for d in data ])
261
-
262
291
def push (self , data ):
263
292
"""
264
293
Push a batch of documents to the Find backend.
265
294
266
295
Args:
267
296
data (list): List of document dictionaries.
268
297
"""
269
-
270
298
try :
271
299
response = requests .post (
272
300
self .indexer_url ,
0 commit comments