Skip to content

Commit 30f83f8

Browse files
committed
perf: chunk read (#4109)
* package * perf: chunk read
1 parent ac7091f commit 30f83f8

File tree

29 files changed

+415
-430
lines changed

29 files changed

+415
-430
lines changed

docSite/content/zh-cn/docs/development/upgrading/491.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ weight: 799
1010
## 🚀 新增内容
1111

1212
1. 商业版支持单团队模式,更好的管理内部成员。
13+
2. 知识库分块阅读器。
1314

1415
## ⚙️ 优化
1516

packages/global/core/dataset/data/constants.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,23 @@ export const DatasetDataIndexMap: Record<
1616
}
1717
> = {
1818
[DatasetDataIndexTypeEnum.default]: {
19-
label: i18nT('dataset:data_index_default'),
19+
label: i18nT('common:data_index_default'),
2020
color: 'gray'
2121
},
2222
[DatasetDataIndexTypeEnum.custom]: {
23-
label: i18nT('dataset:data_index_custom'),
23+
label: i18nT('common:data_index_custom'),
2424
color: 'blue'
2525
},
2626
[DatasetDataIndexTypeEnum.summary]: {
27-
label: i18nT('dataset:data_index_summary'),
27+
label: i18nT('common:data_index_summary'),
2828
color: 'green'
2929
},
3030
[DatasetDataIndexTypeEnum.question]: {
31-
label: i18nT('dataset:data_index_question'),
31+
label: i18nT('common:data_index_question'),
3232
color: 'red'
3333
},
3434
[DatasetDataIndexTypeEnum.image]: {
35-
label: i18nT('dataset:data_index_image'),
35+
label: i18nT('common:data_index_image'),
3636
color: 'purple'
3737
}
3838
};

packages/service/core/chat/saveChat.ts

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -74,41 +74,42 @@ export async function saveChat({
7474
(node) => node.flowNodeType === FlowNodeTypeEnum.pluginInput
7575
)?.inputs;
7676

77-
await mongoSessionRun(async (session) => {
78-
const processedContent = content.map((item) => {
79-
if (item.obj === ChatRoleEnum.AI) {
80-
const nodeResponse = item[DispatchNodeResponseKeyEnum.nodeResponse];
77+
// Format save chat content: Remove quote q/a
78+
const processedContent = content.map((item) => {
79+
if (item.obj === ChatRoleEnum.AI) {
80+
const nodeResponse = item[DispatchNodeResponseKeyEnum.nodeResponse];
8181

82-
if (nodeResponse) {
83-
return {
84-
...item,
85-
[DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse.map((responseItem) => {
86-
if (
87-
responseItem.moduleType === FlowNodeTypeEnum.datasetSearchNode &&
88-
responseItem.quoteList
89-
) {
90-
return {
91-
...item,
92-
quoteList: responseItem.quoteList.map((quote: any) => ({
93-
id: quote.id,
94-
chunkIndex: quote.chunkIndex,
95-
datasetId: quote.datasetId,
96-
collectionId: quote.collectionId,
97-
sourceId: quote.sourceId,
98-
sourceName: quote.sourceName,
99-
score: quote.score,
100-
tokens: quote.tokens
101-
}))
102-
};
103-
}
104-
return item;
105-
})
106-
};
107-
}
82+
if (nodeResponse) {
83+
return {
84+
...item,
85+
[DispatchNodeResponseKeyEnum.nodeResponse]: nodeResponse.map((responseItem) => {
86+
if (
87+
responseItem.moduleType === FlowNodeTypeEnum.datasetSearchNode &&
88+
responseItem.quoteList
89+
) {
90+
return {
91+
...responseItem,
92+
quoteList: responseItem.quoteList.map((quote: any) => ({
93+
id: quote.id,
94+
chunkIndex: quote.chunkIndex,
95+
datasetId: quote.datasetId,
96+
collectionId: quote.collectionId,
97+
sourceId: quote.sourceId,
98+
sourceName: quote.sourceName,
99+
score: quote.score,
100+
tokens: quote.tokens
101+
}))
102+
};
103+
}
104+
return responseItem;
105+
})
106+
};
108107
}
109-
return item;
110-
});
108+
}
109+
return item;
110+
});
111111

112+
await mongoSessionRun(async (session) => {
112113
const [{ _id: chatItemIdHuman }, { _id: chatItemIdAi }] = await MongoChatItem.insertMany(
113114
processedContent.map((item) => ({
114115
chatId,

packages/service/core/dataset/data/schema.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,11 @@ try {
9898
chunkIndex: 1,
9999
updateTime: -1
100100
});
101-
// FullText tmp full text index
102-
// DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
103101
// Recall vectors after data matching
104102
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
105103
DatasetDataSchema.index({ updateTime: 1 });
106104
// rebuild data
107105
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
108-
109-
DatasetDataSchema.index({ initFullText: 1 });
110106
} catch (error) {
111107
console.log(error);
112108
}
Lines changed: 1 addition & 1 deletion
Loading

packages/web/i18n/en/chat.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"custom_input_guide_url": "Custom Lexicon URL",
2121
"dataset_quote_type error": "Knowledge base reference type is wrong, correct type: { datasetId: string }[]",
2222
"delete_all_input_guide_confirm": "Are you sure you want to clear the input guide lexicon?",
23+
"download_chunks": "Download data",
2324
"empty_directory": "This directory is empty~",
2425
"file_amount_over": "Exceeded maximum file quantity {{max}}",
2526
"file_input": "File input",
@@ -42,6 +43,7 @@
4243
"query_extension_IO_tokens": "Problem Optimization Input/Output Tokens",
4344
"query_extension_result": "Problem optimization results",
4445
"question_tip": "From top to bottom, the response order of each module",
46+
"read_raw_source": "Open the original text",
4547
"reasoning_text": "Thinking process",
4648
"response.child total points": "Sub-workflow point consumption",
4749
"response.dataset_concat_length": "Combined total",
@@ -52,6 +54,7 @@
5254
"select_img": "Upload Image",
5355
"source_cronJob": "Scheduled execution",
5456
"stream_output": "Stream Output",
57+
"to_dataset": "Go to the Knowledge Base",
5558
"unsupported_file_type": "Unsupported file types",
5659
"upload": "Upload",
5760
"view_citations": "View References",

packages/web/i18n/en/common.json

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -498,12 +498,9 @@
498498
"core.dataset.Dataset": "Dataset",
499499
"core.dataset.Dataset ID": "Dataset ID",
500500
"core.dataset.Delete Confirm": "Confirm to Delete This Dataset? Data Cannot Be Recovered After Deletion, Please Confirm!",
501-
"core.dataset.Download the parsed content": "Download the parsed content",
502501
"core.dataset.Empty Dataset": "Empty Dataset",
503502
"core.dataset.Empty Dataset Tips": "No Dataset Yet, Create One Now!",
504503
"core.dataset.Folder placeholder": "This is a Directory",
505-
"core.dataset.Get the raw data": "Get the raw data",
506-
"core.dataset.Go Dataset": "Go to Dataset",
507504
"core.dataset.Intro Placeholder": "This Dataset Has No Introduction Yet",
508505
"core.dataset.Manual collection": "Manual Dataset",
509506
"core.dataset.My Dataset": "My Dataset",
@@ -832,6 +829,11 @@
832829
"core.workflow.variable": "Variable",
833830
"create": "Create",
834831
"cron_job_run_app": "Scheduled Task",
832+
"data_index_custom": "Custom index",
833+
"data_index_default": "Default index",
834+
"data_index_image": "Image Index",
835+
"data_index_question": "Inferred question index",
836+
"data_index_summary": "Summary Index",
835837
"dataset.Confirm move the folder": "Confirm to Move to This Directory",
836838
"dataset.Confirm to delete the data": "Confirm to Delete This Data?",
837839
"dataset.Confirm to delete the file": "Confirm to Delete This File and All Its Data?",

packages/web/i18n/en/dataset.json

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,7 @@
2727
"custom_data_process_params_desc": "Customize data processing rules",
2828
"data.ideal_chunk_length": "ideal block length",
2929
"data_amount": "{{dataAmount}} Datas, {{indexAmount}} Indexes",
30-
"data_index_custom": "Custom index",
31-
"data_index_default": "Default index",
32-
"data_index_image": "Image Index",
3330
"data_index_num": "Index {{index}}",
34-
"data_index_question": "Inferred question index",
35-
"data_index_summary": "Summary index",
3631
"data_process_params": "Params",
3732
"data_process_setting": "Processing config",
3833
"dataset.Unsupported operation": "dataset.Unsupported operation",

packages/web/i18n/zh-CN/chat.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"custom_input_guide_url": "自定义词库地址",
2121
"dataset_quote_type error": "知识库引用类型错误,正确类型:{ datasetId: string }[]",
2222
"delete_all_input_guide_confirm": "确定要清空输入引导词库吗?",
23+
"download_chunks": "下载数据",
2324
"empty_directory": "这个目录已经没东西可选了~",
2425
"file_amount_over": "超出最大文件数量 {{max}}",
2526
"file_input": "系统文件",
@@ -42,6 +43,7 @@
4243
"query_extension_IO_tokens": "问题优化输入/输出 Tokens",
4344
"query_extension_result": "问题优化结果",
4445
"question_tip": "从上到下,为各个模块的响应顺序",
46+
"read_raw_source": "打开原文",
4547
"reasoning_text": "思考过程",
4648
"response.child total points": "子工作流积分消耗",
4749
"response.dataset_concat_length": "合并后总数",
@@ -52,6 +54,7 @@
5254
"select_img": "上传图片",
5355
"source_cronJob": "定时执行",
5456
"stream_output": "流输出",
57+
"to_dataset": "前往知识库",
5558
"unsupported_file_type": "不支持的文件类型",
5659
"upload": "上传",
5760
"view_citations": "查看引用",

packages/web/i18n/zh-CN/common.json

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -501,12 +501,9 @@
501501
"core.dataset.Dataset": "知识库",
502502
"core.dataset.Dataset ID": "知识库 ID",
503503
"core.dataset.Delete Confirm": "确认删除该知识库?删除后数据无法恢复,请确认!",
504-
"core.dataset.Download the parsed content": "下载解析内容",
505504
"core.dataset.Empty Dataset": "空数据集",
506505
"core.dataset.Empty Dataset Tips": "还没有知识库,快去创建一个吧!",
507506
"core.dataset.Folder placeholder": "这是一个目录",
508-
"core.dataset.Get the raw data": "获取源数据",
509-
"core.dataset.Go Dataset": "前往知识库",
510507
"core.dataset.Intro Placeholder": "这个知识库还没有介绍~",
511508
"core.dataset.Manual collection": "手动数据集",
512509
"core.dataset.My Dataset": "我的知识库",
@@ -836,6 +833,11 @@
836833
"core.workflow.variable": "变量",
837834
"create": "去创建",
838835
"cron_job_run_app": "定时任务",
836+
"data_index_custom": "自定义索引",
837+
"data_index_default": "默认索引",
838+
"data_index_image": "图片索引",
839+
"data_index_question": "推测问题索引",
840+
"data_index_summary": "摘要索引",
839841
"dataset.Confirm move the folder": "确认移动到该目录",
840842
"dataset.Confirm to delete the data": "确认删除该数据?",
841843
"dataset.Confirm to delete the file": "确认删除该文件及其所有数据?",

0 commit comments

Comments
 (0)