|
21 | 21 | logger = getLogger(__name__) |
22 | 22 |
|
23 | 23 |
|
24 | | -class _ApifyRequestQueueSingleClient: |
| 24 | +class ApifyRequestQueueSingleClient: |
25 | 25 | """An Apify platform implementation of the request queue client with limited capability. |
26 | 26 |
|
27 | 27 | This client is designed to use as little resources as possible, but has to be used in constrained context. |
@@ -108,23 +108,19 @@ async def add_batch_of_requests( |
108 | 108 | # Check if request is known to be already handled (it has to be present as well.) |
109 | 109 | if request.unique_key in self._requests_already_handled: |
110 | 110 | already_present_requests.append( |
111 | | - ProcessedRequest.model_validate( |
112 | | - { |
113 | | - 'uniqueKey': request.unique_key, |
114 | | - 'wasAlreadyPresent': True, |
115 | | - 'wasAlreadyHandled': True, |
116 | | - } |
| 111 | + ProcessedRequest( |
| 112 | + unique_key=request.unique_key, |
| 113 | + was_already_present=True, |
| 114 | + was_already_handled=True, |
117 | 115 | ) |
118 | 116 | ) |
119 | 117 | # Check if request is known to be already present, but unhandled |
120 | 118 | elif self._requests_cache.get(request.unique_key): |
121 | 119 | already_present_requests.append( |
122 | | - ProcessedRequest.model_validate( |
123 | | - { |
124 | | - 'uniqueKey': request.unique_key, |
125 | | - 'wasAlreadyPresent': True, |
126 | | - 'wasAlreadyHandled': request.was_already_handled, |
127 | | - } |
| 120 | + ProcessedRequest( |
| 121 | + unique_key=request.unique_key, |
| 122 | + was_already_present=True, |
| 123 | + was_already_handled=request.was_already_handled, |
128 | 124 | ) |
129 | 125 | ) |
130 | 126 | else: |
@@ -158,8 +154,9 @@ async def add_batch_of_requests( |
158 | 154 | self._requests_cache.pop(unprocessed_request.unique_key, None) |
159 | 155 |
|
160 | 156 | else: |
161 | | - api_response = AddRequestsResponse.model_validate( |
162 | | - {'unprocessedRequests': [], 'processedRequests': already_present_requests} |
| 157 | + api_response = AddRequestsResponse( |
| 158 | + unprocessed_requests=[], |
| 159 | + processed_requests=already_present_requests, |
163 | 160 | ) |
164 | 161 |
|
165 | 162 | # Update assumed total count for newly added requests. |
@@ -236,28 +233,41 @@ async def _list_head(self) -> None: |
236 | 233 |
|
237 | 234 | # Update the cached data |
238 | 235 | for request_data in response.get('items', []): |
| 236 | + # Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53, |
| 237 | + # the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys). |
| 238 | + # If truncation is detected, fetch the full request data by its ID from the API. |
| 239 | + # This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys. |
| 240 | + # See https://github.com/apify/apify-sdk-python/issues/630 for details. |
| 241 | + if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']: |
| 242 | + request_data = await self._api_client.get_request(request_id=request_data['id']) # noqa: PLW2901 |
| 243 | + |
239 | 244 | request = Request.model_validate(request_data) |
240 | 245 |
|
241 | 246 | if request.unique_key in self._requests_in_progress: |
242 | 247 | # Ignore requests that are already in progress, we will not process them again. |
243 | 248 | continue |
| 249 | + |
244 | 250 | if request.was_already_handled: |
245 | 251 | # Do not cache fully handled requests, we do not need them. Just cache their unique_key. |
246 | 252 | self._requests_already_handled.add(request.unique_key) |
247 | 253 | else: |
248 | 254 | # Only fetch the request if we do not know it yet. |
249 | 255 | if request.unique_key not in self._requests_cache: |
250 | 256 | request_id = unique_key_to_request_id(request.unique_key) |
251 | | - complete_request_data = await self._api_client.get_request(request_id) |
252 | 257 |
|
253 | | - if complete_request_data is not None: |
254 | | - request = Request.model_validate(complete_request_data) |
255 | | - self._requests_cache[request.unique_key] = request |
256 | | - else: |
| 258 | + if request_data is not None and request_id != request_data['id']: |
257 | 259 | logger.warning( |
258 | | - f'Could not fetch request data for unique_key=`{request.unique_key}` (id=`{request_id}`)' |
| 260 | + f'Request ID mismatch: {request_id} != {request_data["id"]}, ' |
| 261 | + 'this may cause unexpected behavior.' |
259 | 262 | ) |
260 | 263 |
|
| 264 | + # See https://github.com/apify/apify-sdk-python/issues/630 for details. |
| 265 | + if '[truncated]' not in request.unique_key: |
| 266 | + request_data = await self._api_client.get_request(request_id=request_id) # noqa: PLW2901 |
| 267 | + request = Request.model_validate(request_data) |
| 268 | + |
| 269 | + self._requests_cache[request.unique_key] = request |
| 270 | + |
261 | 271 | # Add new requests to the end of the head, unless already present in head |
262 | 272 | if request.unique_key not in self._head_requests: |
263 | 273 | self._head_requests.appendleft(request.unique_key) |
|
0 commit comments