Skip to content

Commit bc0ac6d

Browse files
authored
Fix: websync doc and export dataset ux (#1225)
* Revert "lafAccount add pat & re request when token invalid (#76)" (#77) This reverts commit 83d85dfe37adcaef4833385ea52ee79fd84720be. * perf: workflow ux * system config * perf: export data * doc * update doc * fix: whisper
1 parent 78d50e1 commit bc0ac6d

File tree

7 files changed

+131
-21
lines changed

7 files changed

+131
-21
lines changed
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
---
2+
title: 'Web 站点同步'
3+
description: 'FastGPT Web 站点同步功能介绍和使用方式'
4+
icon: 'language'
5+
draft: false
6+
toc: true
7+
weight: 105
8+
---
9+
10+
![](/imgs/webSync1.jpg)
11+
12+
该功能目前仅向商业版用户开放。
13+
14+
## 什么是 Web 站点同步
15+
16+
Web 站点同步利用爬虫的技术,可以通过一个入口网站,自动捕获`同域名`下的所有网站,目前最多支持`200`个子页面。出于合规与安全角度,FastGPT 仅支持`静态站点`的爬取,主要用于各个文档站点快速构建知识库。
17+
18+
Tips: 国内的媒体站点基本不可用,公众号、csdn、知乎等。可以通过终端发送`curl`请求检测是否为静态站点,例如:
19+
20+
```bash
21+
curl https://doc.fastgpt.in/docs/intro/
22+
```
23+
24+
## 如何使用
25+
26+
### 1. 新建知识库,选择 Web 站点同步
27+
28+
![](/imgs/webSync2.png)
29+
30+
![](/imgs/webSync3.png)
31+
32+
### 2. 点击配置站点信息
33+
34+
![](/imgs/webSync4.png)
35+
36+
### 3. 填写网址和选择器
37+
38+
![](/imgs/webSync5.jpg)
39+
40+
好了, 现在点击开始同步,静等系统自动抓取网站信息即可。
41+
42+
43+
## 创建应用,绑定知识库
44+
45+
![](/imgs/webSync6.webp)
46+
47+
## 选择器如何使用
48+
49+
选择器是 HTML CSS JS 的产物,你可以通过选择器来定位到你需要抓取的具体内容,而不是整个站点。使用方式为:
50+
51+
### 首先打开浏览器调试面板(通常是 F12,或者【右键 - 检查】)
52+
53+
![](/imgs/webSync7.webp)
54+
55+
![](/imgs/webSync8.webp)
56+
57+
### 输入对应元素的选择器
58+
59+
[菜鸟教程 css 选择器](https://www.runoob.com/cssref/css-selectors.html),具体选择器的使用方式可以参考菜鸟教程。
60+
61+
上图中,我们选中了一个区域,对应的是`div`标签,它有 `data-prismjs-copy`, `data-prismjs-copy-success`, `data-prismjs-copy-error` 三个属性,这里我们用到一个就够。所以选择器是:
62+
**`div[data-prismjs-copy]`**
63+
64+
除了属性选择器,常见的还有类和ID选择器。例如:
65+
66+
![](/imgs/webSync9.webp)
67+
68+
上图 class 里的是类名(可能包含多个类名,都是空格隔开的,选择一个即可),选择器可以为:**`.docs-content`**
69+
70+
### 多选择器使用
71+
72+
在开头的演示中,我们对 FastGPT 文档是使用了多选择器的方式来选择,通过逗号隔开了两个选择器。
73+
74+
![](/imgs/webSync10.webp)
75+
76+
我们希望选中上图两个标签中的内容,此时就需要两组选择器。一组是:`.docs-content .mb-0.d-flex`,含义是 `docs-content` 类下同时包含 `mb-0``d-flex` 两个类的子元素;
77+
78+
另一组是`.docs-content div[data-prismjs-copy]`,含义是`docs-content` 类下包含`data-prismjs-copy`属性的`div`元素。
79+
80+
把两组选择器用逗号隔开即可:`.docs-content .mb-0.d-flex, .docs-content div[data-prismjs-copy]`

packages/global/core/module/template/system/userGuide.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ export const UserGuideModule: FlowNodeTemplateType = {
4646
label: '',
4747
showTargetInApp: false,
4848
showTargetInPlugin: false
49+
},
50+
{
51+
key: ModuleInputKeyEnum.whisper,
52+
type: FlowNodeInputTypeEnum.hidden,
53+
valueType: ModuleIOValueTypeEnum.any,
54+
label: '',
55+
showTargetInApp: false,
56+
showTargetInPlugin: false
4957
}
5058
],
5159
outputs: []

projects/app/public/locales/en/common.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@
566566
"Set Empty Result Tip": ",Response empty text",
567567
"Set Website Config": "Configuring Website",
568568
"Similarity": "Similarity",
569+
"Start export": "Export started",
569570
"Sync Time": "Update Time",
570571
"Table collection": "Table collection",
571572
"Text collection": "Text collection",
@@ -965,6 +966,7 @@
965966
"AI support tool tip": "A model that supports function calls allows better use of tool calls.",
966967
"Ai chat": "LLM Chat",
967968
"Ai chat intro": "Request LLM chat",
969+
"App system setting": "",
968970
"Assigned reply": "Assigned reply",
969971
"Assigned reply intro": "The module can respond directly to a specified piece of content. Often used to guide and prompt. When non-string content is passed in, it is converted to a string for output.",
970972
"Basic Node": "Basic Node",

projects/app/public/locales/zh/common.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@
566566
"Set Empty Result Tip": ",未搜索到内容时回复指定内容",
567567
"Set Website Config": "开始配置网站信息",
568568
"Similarity": "相关度",
569+
"Start export": "已开始导出",
569570
"Sync Time": "最后更新时间",
570571
"Table collection": "表格数据集",
571572
"Text collection": "文本数据集",
@@ -610,7 +611,8 @@
610611
"success": "开始同步"
611612
}
612613
},
613-
"training": {}
614+
"training": {
615+
}
614616
},
615617
"data": {
616618
"Auxiliary Data": "辅助数据",
@@ -966,6 +968,7 @@
966968
"AI support tool tip": "支持函数调用的模型,可以更好的使用工具调用。",
967969
"Ai chat": "AI 对话",
968970
"Ai chat intro": "AI 大模型对话",
971+
"App system setting": "系统配置",
969972
"Assigned reply": "指定回复",
970973
"Assigned reply intro": "该模块可以直接回复一段指定的内容。常用于引导、提示。非字符串内容传入时,会转成字符串进行输出。",
971974
"Basic Node": "基础功能",
@@ -997,7 +1000,6 @@
9971000
"Tool module": "工具",
9981001
"UnKnow Module": "未知模块",
9991002
"User guide": "用户引导",
1000-
"App system setting": "系统配置",
10011003
"http body placeholder": "与APIFox相同的语法",
10021004
"textEditor": "文本加工",
10031005
"textEditor intro": "可对固定或传入的文本进行加工后输出,非字符串类型数据最终会转成字符串类型。"

projects/app/src/pages/api/core/dataset/exportAll.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,15 @@ export default withNextCors(async function handler(req: NextApiRequest, res: Nex
7171
cursor.on('end', () => {
7272
cursor.close();
7373
res.end();
74-
updateExportDatasetLimit(teamId);
7574
});
7675

7776
cursor.on('error', (err) => {
7877
addLog.error(`export dataset error`, err);
7978
res.status(500);
8079
res.end();
8180
});
81+
82+
updateExportDatasetLimit(teamId);
8283
} catch (err) {
8384
res.status(500);
8485
addLog.error(`export dataset error`, err);

projects/app/src/pages/dataset/list/index.tsx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,17 @@ const Kb = () => {
9292
setLoading(true);
9393
await checkTeamExportDatasetLimit(dataset._id);
9494

95-
xmlDownloadFetch({
95+
await xmlDownloadFetch({
9696
url: `/api/core/dataset/exportAll?datasetId=${dataset._id}`,
9797
filename: `${dataset.name}.csv`
9898
});
9999
},
100+
onSuccess() {
101+
toast({
102+
status: 'success',
103+
title: t('core.dataset.Start export')
104+
});
105+
},
100106
onSettled() {
101107
setLoading(false);
102108
},
Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,31 @@
11
import { getToken } from '@/web/support/user/auth';
2+
import { hasHttps } from '@fastgpt/web/common/system/utils';
23

3-
export const xmlDownloadFetch = ({ url, filename }: { url: string; filename: string }) => {
4-
const xhr = new XMLHttpRequest();
5-
xhr.open('GET', url, true);
6-
xhr.setRequestHeader('token', getToken());
7-
xhr.responseType = 'blob';
8-
xhr.onload = function (e) {
9-
if (this.status == 200) {
10-
const blob = this.response;
11-
const a = document.createElement('a');
12-
const url = URL.createObjectURL(blob);
13-
a.href = url;
14-
a.download = filename;
15-
a.click();
16-
window.URL.revokeObjectURL(url);
17-
}
18-
};
19-
xhr.send();
4+
export const xmlDownloadFetch = async ({ url, filename }: { url: string; filename: string }) => {
5+
if (hasHttps()) {
6+
const a = document.createElement('a');
7+
a.href = url;
8+
a.download = filename;
9+
document.body.appendChild(a);
10+
a.click();
11+
document.body.removeChild(a);
12+
} else {
13+
const response = await fetch(url, {
14+
headers: {
15+
token: `${getToken()}`
16+
}
17+
});
18+
if (!response.ok) throw new Error('Network response was not ok.');
19+
20+
const blob = await response.blob();
21+
const downloadUrl = window.URL.createObjectURL(blob);
22+
const a = document.createElement('a');
23+
a.style.display = 'none'; // 隐藏<a>元素
24+
a.href = downloadUrl;
25+
a.download = filename;
26+
document.body.appendChild(a);
27+
a.click(); // 模拟用户点击
28+
document.body.removeChild(a);
29+
window.URL.revokeObjectURL(downloadUrl); // 清理生成的URL
30+
}
2031
};

0 commit comments

Comments
 (0)