@@ -170,6 +170,8 @@ export async function performExtraction(
170
170
] ,
171
171
} ) ;
172
172
173
+ let startMap = Date . now ( ) ;
174
+ let aggMapLinks : string [ ] = [ ] ;
173
175
// Process URLs
174
176
const urlPromises = request . urls . map ( ( url ) =>
175
177
processUrl (
@@ -184,9 +186,20 @@ export async function performExtraction(
184
186
includeSubdomains : request . includeSubdomains ,
185
187
schema : request . schema ,
186
188
} ,
187
- urlTraces ,
188
- ) ,
189
- ) ;
189
+ urlTraces ,
190
+ ( links : string [ ] ) => {
191
+ aggMapLinks . push ( ...links ) ;
192
+ updateExtract ( extractId , {
193
+ steps : [
194
+ {
195
+ step : ExtractStep . MAP ,
196
+ startedAt : startMap ,
197
+ finishedAt : Date . now ( ) ,
198
+ discoveredLinks : aggMapLinks ,
199
+ } ,
200
+ ] ,
201
+ } ) ;
202
+ } ) ) ;
190
203
191
204
const processedUrls = await Promise . all ( urlPromises ) ;
192
205
const links = processedUrls . flat ( ) . filter ( ( url ) => url ) ;
@@ -205,8 +218,8 @@ export async function performExtraction(
205
218
status : "processing" ,
206
219
steps : [
207
220
{
208
- step : ExtractStep . MAP ,
209
- startedAt : Date . now ( ) ,
221
+ step : ExtractStep . MAP_RERANK ,
222
+ startedAt : startMap ,
210
223
finishedAt : Date . now ( ) ,
211
224
discoveredLinks : links ,
212
225
} ,
@@ -221,6 +234,7 @@ export async function performExtraction(
221
234
// if so, it splits the results into 2 types of completions:
222
235
// 1. the first one is a completion that will extract the array of items
223
236
// 2. the second one is multiple completions that will extract the items from the array
237
+ let startAnalyze = Date . now ( ) ;
224
238
const { isMultiEntity, multiEntityKeys, reasoning, keyIndicators } =
225
239
await analyzeSchemaAndPrompt ( links , request . schema , request . prompt ?? "" ) ;
226
240
@@ -239,7 +253,7 @@ export async function performExtraction(
239
253
steps : [
240
254
{
241
255
step : ExtractStep . MULTI_ENTITY ,
242
- startedAt : Date . now ( ) ,
256
+ startedAt : startAnalyze ,
243
257
finishedAt : Date . now ( ) ,
244
258
discoveredLinks : [ ] ,
245
259
} ,
@@ -254,12 +268,14 @@ export async function performExtraction(
254
268
steps : [
255
269
{
256
270
step : ExtractStep . MULTI_ENTITY_SCRAPE ,
257
- startedAt : Date . now ( ) ,
271
+ startedAt : startAnalyze ,
258
272
finishedAt : Date . now ( ) ,
259
273
discoveredLinks : links ,
260
274
} ,
261
275
] ,
262
276
} ) ;
277
+
278
+ let startScrape = Date . now ( ) ;
263
279
const scrapePromises = links . map ( ( url ) => {
264
280
if ( ! docsMap . has ( url ) ) {
265
281
return scrapeDocument (
@@ -280,6 +296,20 @@ export async function performExtraction(
280
296
( doc ) : doc is Document => doc !== null ,
281
297
) ;
282
298
299
+ let endScrape = Date . now ( ) ;
300
+
301
+ await updateExtract ( extractId , {
302
+ status : "processing" ,
303
+ steps : [
304
+ {
305
+ step : ExtractStep . MULTI_ENTITY_SCRAPE ,
306
+ startedAt : startScrape ,
307
+ finishedAt : endScrape ,
308
+ discoveredLinks : links ,
309
+ } ,
310
+ ] ,
311
+ } ) ;
312
+
283
313
for ( const doc of multyEntityDocs ) {
284
314
if ( doc ?. metadata ?. url ) {
285
315
docsMap . set ( doc . metadata . url , doc ) ;
@@ -352,7 +382,7 @@ export async function performExtraction(
352
382
steps : [
353
383
{
354
384
step : ExtractStep . MULTI_ENTITY_EXTRACT ,
355
- startedAt : Date . now ( ) ,
385
+ startedAt : startScrape ,
356
386
finishedAt : Date . now ( ) ,
357
387
discoveredLinks : [ doc . metadata . url || doc . metadata . sourceURL || "" ] ,
358
388
} ,
0 commit comments