@@ -158,27 +158,69 @@ const rebuildData = async ({
158158
159159 const deleteVectorIdList = mongoData . indexes . map ( ( index ) => index . dataId ) ;
160160
161- const { tokens } = await mongoSessionRun ( async ( session ) => {
162- // update vector, update dataset.data rebuilding status, delete data from training
163- const updateResult = await Promise . all (
164- mongoData . indexes . map ( async ( index , i ) => {
165- const result = await insertDatasetDataVector ( {
166- query : index . text ,
167- model : getVectorModel ( trainingData . model ) ,
168- teamId : mongoData . teamId ,
169- datasetId : mongoData . datasetId ,
170- collectionId : mongoData . collectionId
171- } ) ;
172- mongoData . indexes [ i ] . dataId = result . insertId ;
173- return result ;
174- } )
175- ) ;
161+ // Find next rebuilding data to insert training queue
162+ await mongoSessionRun ( async ( session ) => {
163+ // get new mongoData insert to training
164+ const newRebuildingData = await MongoDatasetData . findOneAndUpdate (
165+ {
166+ teamId : mongoData . teamId ,
167+ datasetId : mongoData . datasetId ,
168+ rebuilding : true
169+ } ,
170+ {
171+ $unset : {
172+ rebuilding : null
173+ } ,
174+ updateTime : new Date ( )
175+ } ,
176+ { session }
177+ ) . select ( {
178+ _id : 1 ,
179+ collectionId : 1
180+ } ) ;
176181
177- // Ensure that the training data is deleted after the Mongo update is successful
182+ if ( newRebuildingData ) {
183+ await MongoDatasetTraining . create (
184+ [
185+ {
186+ teamId : mongoData . teamId ,
187+ tmbId : trainingData . tmbId ,
188+ datasetId : mongoData . datasetId ,
189+ collectionId : newRebuildingData . collectionId ,
190+ billId : trainingData . billId ,
191+ mode : TrainingModeEnum . chunk ,
192+ model : trainingData . model ,
193+ q : '1' ,
194+ dataId : newRebuildingData . _id
195+ }
196+ ] ,
197+ { session }
198+ ) ;
199+ }
200+ } ) ;
201+
202+ // update vector, update dataset_data rebuilding status, delete data from training
203+ // 1. Insert new vector to dataset_data
204+ const updateResult = await Promise . all (
205+ mongoData . indexes . map ( async ( index , i ) => {
206+ const result = await insertDatasetDataVector ( {
207+ query : index . text ,
208+ model : getVectorModel ( trainingData . model ) ,
209+ teamId : mongoData . teamId ,
210+ datasetId : mongoData . datasetId ,
211+ collectionId : mongoData . collectionId
212+ } ) ;
213+ mongoData . indexes [ i ] . dataId = result . insertId ;
214+ return result ;
215+ } )
216+ ) ;
217+ const { tokens } = await mongoSessionRun ( async ( session ) => {
218+ // 2. Ensure that the training data is deleted after the Mongo update is successful
178219 await mongoData . save ( { session } ) ;
220+ // 3. Delete the training data
179221 await trainingData . deleteOne ( { session } ) ;
180222
181- // delete old vector
223+ // 4. Delete old vector
182224 await deleteDatasetDataVector ( {
183225 teamId : mongoData . teamId ,
184226 idList : deleteVectorIdList
@@ -189,59 +231,6 @@ const rebuildData = async ({
189231 } ;
190232 } ) ;
191233
192- // find next data insert to training queue
193- const arr = new Array ( 5 ) . fill ( 0 ) ;
194-
195- for await ( const _ of arr ) {
196- try {
197- const hasNextData = await mongoSessionRun ( async ( session ) => {
198- // get new mongoData insert to training
199- const newRebuildingData = await MongoDatasetData . findOneAndUpdate (
200- {
201- teamId : mongoData . teamId ,
202- datasetId : mongoData . datasetId ,
203- rebuilding : true
204- } ,
205- {
206- $unset : {
207- rebuilding : null
208- } ,
209- updateTime : new Date ( )
210- } ,
211- { session }
212- ) . select ( {
213- _id : 1 ,
214- collectionId : 1
215- } ) ;
216-
217- if ( newRebuildingData ) {
218- await MongoDatasetTraining . create (
219- [
220- {
221- teamId : mongoData . teamId ,
222- tmbId : trainingData . tmbId ,
223- datasetId : mongoData . datasetId ,
224- collectionId : newRebuildingData . collectionId ,
225- billId : trainingData . billId ,
226- mode : TrainingModeEnum . chunk ,
227- model : trainingData . model ,
228- q : '1' ,
229- dataId : newRebuildingData . _id
230- }
231- ] ,
232- { session }
233- ) ;
234- }
235-
236- return ! ! newRebuildingData ;
237- } ) ;
238-
239- if ( ! hasNextData ) {
240- break ;
241- }
242- } catch ( error ) { }
243- }
244-
245234 return { tokens } ;
246235} ;
247236
0 commit comments