Skip to content

Commit e860c56

Browse files
authored
perf: delete dataset (#3949)
* fix: collection list count * fix: collection list count * ai proxy ui * perf: delete dataset * perf: add dataset text index * update doc
1 parent efac531 commit e860c56

File tree

7 files changed

+85
-74
lines changed

7 files changed

+85
-74
lines changed

docSite/content/zh-cn/docs/development/upgrading/4823.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,5 @@ curl --location --request POST 'https://{{host}}/api/admin/initv4823' \
5050
2. 暂时移除 md 阅读优化,避免链接分割错误。
5151
3. 离开团队时,未刷新成员列表。
5252
4. PPTX 编码错误,导致解析失败。
53-
5. 删除知识库单条数据时,全文索引未跟随删除。
53+
5. 删除知识库单条数据时,全文索引未跟随删除。
54+
6. 修复 Mongo Dataset text 索引在查询数据时未生效。

packages/service/common/file/image/controller.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ export async function delImgByRelatedId({
118118
}: {
119119
teamId: string;
120120
relateIds: string[];
121-
session: ClientSession;
121+
session?: ClientSession;
122122
}) {
123123
if (relateIds.length === 0) return;
124124

packages/service/core/dataset/collection/controller.ts

Lines changed: 52 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import { MongoImage } from '../../../common/file/image/schema';
2525
import { hashStr } from '@fastgpt/global/common/string/tools';
2626
import { addDays } from 'date-fns';
2727
import { MongoDatasetDataText } from '../data/dataTextSchema';
28+
import { delay, retryFn } from '@fastgpt/global/common/system/utils';
2829

2930
export const createCollectionAndInsertData = async ({
3031
dataset,
@@ -234,7 +235,7 @@ export const delCollectionRelatedSource = async ({
234235
relatedImgId?: string;
235236
};
236237
}[];
237-
session: ClientSession;
238+
session?: ClientSession;
238239
}) => {
239240
if (collections.length === 0) return;
240241

@@ -282,47 +283,55 @@ export async function delCollection({
282283
const datasetIds = Array.from(new Set(collections.map((item) => String(item.datasetId))));
283284
const collectionIds = collections.map((item) => String(item._id));
284285

285-
// Delete training data
286-
await MongoDatasetTraining.deleteMany({
287-
teamId,
288-
datasetId: { $in: datasetIds },
289-
collectionId: { $in: collectionIds }
286+
await retryFn(async () => {
287+
await Promise.all([
288+
// Delete training data
289+
MongoDatasetTraining.deleteMany({
290+
teamId,
291+
datasetId: { $in: datasetIds },
292+
collectionId: { $in: collectionIds }
293+
}),
294+
// Delete dataset_data_texts
295+
MongoDatasetDataText.deleteMany({
296+
teamId,
297+
datasetId: { $in: datasetIds },
298+
collectionId: { $in: collectionIds }
299+
}),
300+
// Delete dataset_datas
301+
MongoDatasetData.deleteMany({
302+
teamId,
303+
datasetId: { $in: datasetIds },
304+
collectionId: { $in: collectionIds }
305+
}),
306+
...(delImg
307+
? [
308+
delImgByRelatedId({
309+
teamId,
310+
relateIds: collections
311+
.map((item) => item?.metadata?.relatedImgId || '')
312+
.filter(Boolean)
313+
})
314+
]
315+
: []),
316+
...(delFile
317+
? [
318+
delFileByFileIdList({
319+
bucketName: BucketNameEnum.dataset,
320+
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
321+
})
322+
]
323+
: []),
324+
// Delete vector data
325+
deleteDatasetDataVector({ teamId, datasetIds, collectionIds })
326+
]);
327+
328+
// delete collections
329+
await MongoDatasetCollection.deleteMany(
330+
{
331+
teamId,
332+
_id: { $in: collectionIds }
333+
},
334+
{ session }
335+
);
290336
});
291-
292-
if (delImg) {
293-
await delImgByRelatedId({
294-
teamId,
295-
relateIds: collections.map((item) => item?.metadata?.relatedImgId || '').filter(Boolean),
296-
session
297-
});
298-
}
299-
if (delFile) {
300-
await delFileByFileIdList({
301-
bucketName: BucketNameEnum.dataset,
302-
fileIdList: collections.map((item) => item?.fileId || '').filter(Boolean)
303-
});
304-
}
305-
306-
// Delete dataset_datas
307-
await MongoDatasetData.deleteMany(
308-
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
309-
{ session }
310-
);
311-
// Delete dataset_data_texts
312-
await MongoDatasetDataText.deleteMany(
313-
{ teamId, datasetId: { $in: datasetIds }, collectionId: { $in: collectionIds } },
314-
{ session }
315-
);
316-
317-
// delete collections
318-
await MongoDatasetCollection.deleteMany(
319-
{
320-
teamId,
321-
_id: { $in: collectionIds }
322-
},
323-
{ session }
324-
);
325-
326-
// no session delete: delete files, vector data
327-
await deleteDatasetDataVector({ teamId, datasetIds, collectionIds });
328337
}

packages/service/core/dataset/controller.ts

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { MongoDatasetData } from './data/schema';
88
import { deleteDatasetDataVector } from '../../common/vectorStore/controller';
99
import { MongoDatasetDataText } from './data/dataTextSchema';
1010
import { DatasetErrEnum } from '@fastgpt/global/common/error/code/dataset';
11+
import { retryFn } from '@fastgpt/global/common/system/utils';
1112

1213
/* ============= dataset ========== */
1314
/* find all datasetId by top datasetId */
@@ -78,40 +79,39 @@ export async function delDatasetRelevantData({
7879

7980
const datasetIds = datasets.map((item) => item._id);
8081

81-
// delete training data
82-
await MongoDatasetTraining.deleteMany({
83-
teamId,
84-
datasetId: { $in: datasetIds }
85-
});
86-
8782
// Get _id, teamId, fileId, metadata.relatedImgId for all collections
8883
const collections = await MongoDatasetCollection.find(
8984
{
9085
teamId,
9186
datasetId: { $in: datasetIds }
9287
},
93-
'_id teamId datasetId fileId metadata',
94-
{ session }
88+
'_id teamId datasetId fileId metadata'
9589
).lean();
9690

97-
// Delete Image and file
98-
await delCollectionRelatedSource({ collections, session });
91+
await retryFn(async () => {
92+
await Promise.all([
93+
// delete training data
94+
MongoDatasetTraining.deleteMany({
95+
teamId,
96+
datasetId: { $in: datasetIds }
97+
}),
98+
//Delete dataset_data_texts
99+
MongoDatasetDataText.deleteMany({
100+
teamId,
101+
datasetId: { $in: datasetIds }
102+
}),
103+
//delete dataset_datas
104+
MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } }),
105+
// Delete Image and file
106+
delCollectionRelatedSource({ collections }),
107+
// Delete vector data
108+
deleteDatasetDataVector({ teamId, datasetIds })
109+
]);
110+
});
99111

100112
// delete collections
101113
await MongoDatasetCollection.deleteMany({
102114
teamId,
103115
datasetId: { $in: datasetIds }
104116
}).session(session);
105-
106-
// No session delete:
107-
// Delete dataset_data_texts
108-
await MongoDatasetDataText.deleteMany({
109-
teamId,
110-
datasetId: { $in: datasetIds }
111-
});
112-
// delete dataset_datas
113-
await MongoDatasetData.deleteMany({ teamId, datasetId: { $in: datasetIds } });
114-
115-
// Delete vector data
116-
await deleteDatasetDataVector({ teamId, datasetIds });
117117
}

packages/service/core/dataset/data/dataTextSchema.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ try {
4040
default_language: 'none'
4141
}
4242
);
43+
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
4344
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
4445
} catch (error) {
4546
console.log(error);

projects/app/src/pageComponents/account/model/Log/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ const LogDetail = ({ data, onClose }: { data: LogDetailType; onClose: () => void
404404
</GridItem>
405405
)}
406406
{detailData?.response_body && (
407-
<GridItem display={'flex'} borderBottomWidth="1px" borderRightWidth="1px" colSpan={2}>
407+
<GridItem display={'flex'} colSpan={2}>
408408
<Title>Response Body</Title>
409409
<Container>{detailData?.response_body}</Container>
410410
</GridItem>

projects/app/src/pages/api/core/dataset/delete.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,17 @@ async function handler(req: NextApiRequest) {
3434
});
3535
const datasetIds = datasets.map((d) => d._id);
3636

37+
// delete collection.tags
38+
await MongoDatasetCollectionTags.deleteMany({
39+
teamId,
40+
datasetId: { $in: datasetIds }
41+
});
42+
3743
// delete all dataset.data and pg data
3844
await mongoSessionRun(async (session) => {
3945
// delete dataset data
4046
await delDatasetRelevantData({ datasets, session });
4147

42-
// delete collection.tags
43-
await MongoDatasetCollectionTags.deleteMany({
44-
teamId,
45-
datasetId: { $in: datasetIds }
46-
}).session(session);
47-
4848
// delete dataset
4949
await MongoDataset.deleteMany(
5050
{

0 commit comments

Comments
 (0)