{t('common:common.Name')} |
- {t('dataset:collection.Training type')} |
+ {t('dataset:collection.training_type')} |
{t('dataset:collection_data_count')} |
{t('dataset:collection.Create update time')} |
{t('common:common.Status')} |
@@ -251,7 +249,14 @@ const CollectionCard = () => {
{!checkCollectionIsFolder(collection.type) ? (
- <>{t((getTrainingTypeLabel(collection.trainingType) || '-') as any)}>
+ <>
+ {collection.trainingType
+ ? t(
+ (DatasetCollectionDataProcessModeMap[collection.trainingType]
+ ?.label || '-') as any
+ )
+ : '-'}
+ >
) : (
'-'
)}
diff --git a/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx b/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx
index 52eacd9bfae8..853efddecd3c 100644
--- a/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Import/Context.tsx
@@ -1,13 +1,16 @@
import { useRouter } from 'next/router';
-import { SetStateAction, useState } from 'react';
+import { SetStateAction, useMemo, useState } from 'react';
import { useTranslation } from 'next-i18next';
import { createContext, useContextSelector } from 'use-context-selector';
-import { ImportDataSourceEnum, TrainingModeEnum } from '@fastgpt/global/core/dataset/constants';
+import {
+ DatasetCollectionDataProcessModeEnum,
+ ImportDataSourceEnum
+} from '@fastgpt/global/core/dataset/constants';
import { useMyStep } from '@fastgpt/web/hooks/useStep';
import { Box, Button, Flex, IconButton } from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { TabEnum } from '../NavBar';
-import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
+import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import { UseFormReturn, useForm } from 'react-hook-form';
import { ImportSourceItemType } from '@/web/core/dataset/type';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
@@ -19,12 +22,10 @@ type TrainingFiledType = {
minChunkSize: number;
autoChunkSize: number;
chunkSize: number;
- showChunkInput: boolean;
- showPromptInput: boolean;
charsPointsPrice: number;
priceTip: string;
uploadRate: number;
- chunkSizeField?: ChunkSizeFieldType;
+ chunkSizeField: ChunkSizeFieldType;
};
type DatasetImportContextType = {
importSource: ImportDataSourceEnum;
@@ -39,8 +40,13 @@ type DatasetImportContextType = {
type ChunkSizeFieldType = 'embeddingChunkSize' | 'qaChunkSize';
export type ImportFormType = {
- mode: TrainingModeEnum;
- way: ImportProcessWayEnum;
+ customPdfParse: boolean;
+
+ trainingType: DatasetCollectionDataProcessModeEnum;
+ imageIndex: boolean;
+ autoIndexes: boolean;
+
+ chunkSettingMode: ChunkSettingModeEnum;
embeddingChunkSize: number;
qaChunkSize: number;
customSplitChar: string;
@@ -58,8 +64,6 @@ export const DatasetImportContext = createContext({
maxChunkSize: 0,
minChunkSize: 0,
- showChunkInput: false,
- showPromptInput: false,
sources: [],
setSources: function (value: SetStateAction): void {
throw new Error('Function not implemented.');
@@ -88,72 +92,93 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
const modeSteps: Record = {
[ImportDataSourceEnum.reTraining]: [
{ title: t('dataset:core.dataset.import.Adjust parameters') },
- { title: t('common:core.dataset.import.Upload data') }
+ {
+ title: t('dataset:import_data_preview')
+ },
+ { title: t('dataset:import_confirm') }
],
[ImportDataSourceEnum.fileLocal]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
+ },
+ {
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_data_preview')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.fileLink]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
+ },
+ {
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_data_preview')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.fileCustom]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
+ },
+ {
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_data_preview')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.csvTable]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_data_preview')
+ },
+ {
+ title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.externalFile]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_data_preview')
+ },
+ {
+ title: t('dataset:import_confirm')
}
],
[ImportDataSourceEnum.apiDataset]: [
{
- title: t('common:core.dataset.import.Select file')
+ title: t('dataset:import_select_file')
+ },
+ {
+ title: t('dataset:import_param_setting')
},
{
- title: t('common:core.dataset.import.Data Preprocessing')
+ title: t('dataset:import_data_preview')
},
{
- title: t('common:core.dataset.import.Upload data')
+ title: t('dataset:import_confirm')
}
]
};
@@ -168,96 +193,114 @@ const DatasetImportContextProvider = ({ children }: { children: React.ReactNode
const processParamsForm = useForm({
defaultValues: {
- mode: TrainingModeEnum.chunk,
- way: ImportProcessWayEnum.auto,
+ imageIndex: false,
+ autoIndexes: false,
+
+ trainingType: DatasetCollectionDataProcessModeEnum.chunk,
+
+ chunkSettingMode: ChunkSettingModeEnum.auto,
embeddingChunkSize: vectorModel?.defaultToken || 512,
qaChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
customSplitChar: '',
qaPrompt: Prompt_AgentQA.description,
- webSelector: ''
+ webSelector: '',
+ customPdfParse: false
}
});
const [sources, setSources] = useState([]);
// watch form
- const mode = processParamsForm.watch('mode');
- const way = processParamsForm.watch('way');
+ const trainingType = processParamsForm.watch('trainingType');
+ const chunkSettingMode = processParamsForm.watch('chunkSettingMode');
const embeddingChunkSize = processParamsForm.watch('embeddingChunkSize');
const qaChunkSize = processParamsForm.watch('qaChunkSize');
const customSplitChar = processParamsForm.watch('customSplitChar');
+ const autoIndexes = processParamsForm.watch('autoIndexes');
- const modeStaticParams: Record = {
- [TrainingModeEnum.auto]: {
- chunkOverlapRatio: 0.2,
- maxChunkSize: 2048,
- minChunkSize: 100,
- autoChunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
- chunkSize: vectorModel?.defaultToken ? vectorModel?.defaultToken * 2 : 1024,
- showChunkInput: false,
- showPromptInput: false,
- charsPointsPrice: agentModel.charsPointsPrice || 0,
- priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
- price: agentModel.charsPointsPrice
- }),
- uploadRate: 100
- },
- [TrainingModeEnum.chunk]: {
- chunkSizeField: 'embeddingChunkSize' as ChunkSizeFieldType,
- chunkOverlapRatio: 0.2,
- maxChunkSize: vectorModel?.maxToken || 512,
- minChunkSize: 100,
- autoChunkSize: vectorModel?.defaultToken || 512,
- chunkSize: embeddingChunkSize,
- showChunkInput: true,
- showPromptInput: false,
- charsPointsPrice: vectorModel.charsPointsPrice || 0,
- priceTip: t('dataset:import.Embedding Estimated Price Tips', {
- price: vectorModel.charsPointsPrice
- }),
- uploadRate: 150
- },
- [TrainingModeEnum.qa]: {
- chunkSizeField: 'qaChunkSize' as ChunkSizeFieldType,
- chunkOverlapRatio: 0,
- maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
- minChunkSize: 4000,
- autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
- chunkSize: qaChunkSize,
- showChunkInput: true,
- showPromptInput: true,
- charsPointsPrice: agentModel.charsPointsPrice || 0,
- priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
- price: agentModel.charsPointsPrice
- }),
- uploadRate: 30
+ const TrainingModeMap = useMemo(() => {
+ if (trainingType === DatasetCollectionDataProcessModeEnum.qa) {
+ return {
+ chunkSizeField: 'qaChunkSize',
+ chunkOverlapRatio: 0,
+ maxChunkSize: Math.min(agentModel.maxResponse * 4, agentModel.maxContext * 0.7),
+ minChunkSize: 4000,
+ autoChunkSize: Math.min(agentModel.maxResponse * 1, agentModel.maxContext * 0.7),
+ chunkSize: qaChunkSize,
+ charsPointsPrice: agentModel.charsPointsPrice || 0,
+ priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
+ price: agentModel.charsPointsPrice
+ }),
+ uploadRate: 30
+ };
+ } else if (autoIndexes) {
+ return {
+ chunkSizeField: 'embeddingChunkSize',
+ chunkOverlapRatio: 0.2,
+ maxChunkSize: 2048,
+ minChunkSize: 100,
+ autoChunkSize: vectorModel?.defaultToken ? vectorModel.defaultToken * 2 : 1024,
+ chunkSize: embeddingChunkSize,
+ charsPointsPrice: agentModel.charsPointsPrice || 0,
+ priceTip: t('dataset:import.Auto mode Estimated Price Tips', {
+ price: agentModel.charsPointsPrice
+ }),
+ uploadRate: 100
+ };
+ } else {
+ return {
+ chunkSizeField: 'embeddingChunkSize',
+ chunkOverlapRatio: 0.2,
+ maxChunkSize: vectorModel?.maxToken || 512,
+ minChunkSize: 100,
+ autoChunkSize: vectorModel?.defaultToken || 512,
+ chunkSize: embeddingChunkSize,
+ charsPointsPrice: vectorModel.charsPointsPrice || 0,
+ priceTip: t('dataset:import.Embedding Estimated Price Tips', {
+ price: vectorModel.charsPointsPrice
+ }),
+ uploadRate: 150
+ };
}
- };
- const selectModelStaticParam = modeStaticParams[mode];
+ }, [
+ trainingType,
+ autoIndexes,
+ agentModel.maxResponse,
+ agentModel.maxContext,
+ agentModel.charsPointsPrice,
+ qaChunkSize,
+ t,
+ vectorModel.defaultToken,
+ vectorModel?.maxToken,
+ vectorModel.charsPointsPrice,
+ embeddingChunkSize
+ ]);
- const wayStaticPrams = {
- [ImportProcessWayEnum.auto]: {
- chunkSize: selectModelStaticParam.autoChunkSize,
- customSplitChar: ''
- },
- [ImportProcessWayEnum.custom]: {
- chunkSize: modeStaticParams[mode].chunkSize,
- customSplitChar
+ const chunkSettingModeMap = useMemo(() => {
+ if (chunkSettingMode === ChunkSettingModeEnum.auto) {
+ return {
+ chunkSize: TrainingModeMap.autoChunkSize,
+ customSplitChar: ''
+ };
+ } else {
+ return {
+ chunkSize: TrainingModeMap.chunkSize,
+ customSplitChar
+ };
}
- };
- const chunkSize = wayStaticPrams[way].chunkSize;
+ }, [chunkSettingMode, TrainingModeMap.autoChunkSize, TrainingModeMap.chunkSize, customSplitChar]);
const contextValue = {
+ ...TrainingModeMap,
+ ...chunkSettingModeMap,
importSource: source,
parentId,
activeStep,
goToNext,
processParamsForm,
- ...selectModelStaticParam,
sources,
- setSources,
- chunkSize
+ setSources
};
return (
diff --git a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/DataProcess.tsx b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/DataProcess.tsx
index c5e30ed49086..6daae5d73ec7 100644
--- a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/DataProcess.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/DataProcess.tsx
@@ -1,4 +1,4 @@
-import React, { useCallback, useMemo, useRef } from 'react';
+import React, { useCallback, useEffect, useMemo, useRef } from 'react';
import {
Box,
Flex,
@@ -7,45 +7,48 @@ import {
ModalBody,
ModalFooter,
Textarea,
- useDisclosure
+ useDisclosure,
+ Checkbox,
+ Accordion,
+ AccordionItem,
+ AccordionButton,
+ AccordionPanel,
+ AccordionIcon,
+ HStack
} from '@chakra-ui/react';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useTranslation } from 'next-i18next';
import LeftRadio from '@fastgpt/web/components/common/Radio/LeftRadio';
-import { TrainingModeEnum, TrainingTypeMap } from '@fastgpt/global/core/dataset/constants';
-import { ImportProcessWayEnum } from '@/web/core/dataset/constants';
+import {
+ DatasetCollectionDataProcessModeEnum,
+ DatasetCollectionDataProcessModeMap
+} from '@fastgpt/global/core/dataset/constants';
+import { ChunkSettingModeEnum } from '@/web/core/dataset/constants';
import MyTooltip from '@fastgpt/web/components/common/MyTooltip';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyModal from '@fastgpt/web/components/common/MyModal';
import { Prompt_AgentQA } from '@fastgpt/global/core/ai/prompt/agent';
-import Preview from '../components/Preview';
import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
-import { useToast } from '@fastgpt/web/hooks/useToast';
import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
import MyNumberInput from '@fastgpt/web/components/common/Input/NumberInput';
import QuestionTip from '@fastgpt/web/components/common/MyTooltip/QuestionTip';
+import { shadowLight } from '@fastgpt/web/styles/theme';
+import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
+import { useToast } from '@fastgpt/web/hooks/useToast';
-function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean }) {
+function DataProcess() {
const { t } = useTranslation();
const { feConfigs } = useSystemStore();
+ const { toast } = useToast();
- const {
- goToNext,
- processParamsForm,
- chunkSizeField,
- minChunkSize,
- showChunkInput,
- showPromptInput,
- maxChunkSize,
- priceTip,
- chunkSize
- } = useContextSelector(DatasetImportContext, (v) => v);
+ const { goToNext, processParamsForm, chunkSizeField, minChunkSize, maxChunkSize } =
+ useContextSelector(DatasetImportContext, (v) => v);
+ const datasetDetail = useContextSelector(DatasetPageContext, (v) => v.datasetDetail);
const { getValues, setValue, register, watch } = processParamsForm;
- const { toast } = useToast();
- const mode = watch('mode');
- const way = watch('way');
+ const trainingType = watch('trainingType');
+ const chunkSettingMode = watch('chunkSettingMode');
const {
isOpen: isOpenCustomPrompt,
@@ -54,214 +57,317 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
} = useDisclosure();
const trainingModeList = useMemo(() => {
- const list = Object.entries(TrainingTypeMap);
- return list;
+ const list = Object.entries(DatasetCollectionDataProcessModeMap);
+ return list
+ .filter(([key]) => key !== DatasetCollectionDataProcessModeEnum.auto)
+ .map(([key, value]) => ({
+ title: t(value.label as any),
+ value: key as DatasetCollectionDataProcessModeEnum,
+ tooltip: t(value.tooltip as any)
+ }));
}, []);
- const onSelectTrainWay = useCallback(
- (e: TrainingModeEnum) => {
- if (!feConfigs?.isPlus && !TrainingTypeMap[e]?.openSource) {
- return toast({
- status: 'warning',
- title: t('common:common.system.Commercial version function')
- });
- }
- setValue('mode', e);
- },
- [feConfigs?.isPlus, setValue, t, toast]
- );
-
- return (
-
-
-
-
- {t('dataset:data_process_setting')}
-
-
-
- {t('dataset:training_mode')}
- ({
- title: t(value.label as any),
- value: key,
- tooltip: t(value.tooltip as any)
- }))}
- px={3}
- py={2}
- value={mode}
- onChange={onSelectTrainWay}
- defaultBg="white"
- activeBg="white"
- display={'flex'}
- flexWrap={'wrap'}
- />
+ const Title = useCallback(({ title }: { title: string }) => {
+ return (
+
+
+
+ {title}
+
+
+ );
+ }, []);
-
- {t('dataset:data_process_params')}
-
- {showChunkInput && chunkSizeField && (
-
-
- {t('dataset:ideal_chunk_length')}
-
-
- span': {
- display: 'block'
- }
- }}
- >
-
- {
- if (e === undefined) return;
- setValue(chunkSizeField, +e);
- }}
- />
-
-
-
- )}
+ // Adapt auto training
+ useEffect(() => {
+ if (trainingType === DatasetCollectionDataProcessModeEnum.auto) {
+ setValue('autoIndexes', true);
+ setValue('trainingType', DatasetCollectionDataProcessModeEnum.chunk);
+ }
+ }, [trainingType, setValue]);
-
-
- {t('common:core.dataset.import.Custom split char')}
-
-
-
-
-
-
+ const showFileParseSetting = feConfigs?.showCustomPdfParse;
+ const showQAPromptInput = trainingType === DatasetCollectionDataProcessModeEnum.qa;
- {showPromptInput && (
-
- {t('common:core.dataset.collection.QA Prompt')}
-
+
+
+ {showFileParseSetting && (
+
+
+
+
+
+ {feConfigs.showCustomPdfParse && (
+
+
+ {t('dataset:pdf_enhance_parse')}
+
+
+ {feConfigs?.show_pay && (
+
- {getValues('qaPrompt')}
+ {t('dataset:pdf_enhance_parse_price', {
+ price: feConfigs.customPdfParsePrice || 0
+ })}
+
+ )}
+
+ )}
+
+
+
+ )}
-
- }
- color={'black'}
- position={'absolute'}
- right={2}
- bottom={2}
- onClick={onOpenCustomPrompt}
+
+
+
+
+
+
+ {t('dataset:training_mode')}
+
+
+ list={trainingModeList}
+ px={3}
+ py={2.5}
+ value={trainingType}
+ onChange={(e) => {
+ setValue('trainingType', e);
+ }}
+ defaultBg="white"
+ activeBg="white"
+ gridTemplateColumns={'repeat(2, 1fr)'}
+ />
+
+ {trainingType === DatasetCollectionDataProcessModeEnum.chunk && feConfigs?.isPlus && (
+
+
+ {t('dataset:enhanced_indexes')}
+
+
+
+
+ {t('dataset:auto_indexes')}
+
+
+
+
+
+
+ {t('dataset:image_auto_parse')}
+
+
+
+
+
+
+ )}
+
+
+ {t('dataset:params_setting')}
+
+
+ list={[
+ {
+ title: t('dataset:default_params'),
+ desc: t('dataset:default_params_desc'),
+ value: ChunkSettingModeEnum.auto
+ },
+ {
+ title: t('dataset:custom_data_process_params'),
+ desc: t('dataset:custom_data_process_params_desc'),
+ value: ChunkSettingModeEnum.custom,
+ children: chunkSettingMode === ChunkSettingModeEnum.custom && (
+
+
+
+ {t('dataset:ideal_chunk_length')}
+
+
+ span': {
+ display: 'block'
+ }
+ }}
>
- {t('common:core.dataset.import.Custom prompt')}
-
+
+
+
+
+
+
+
+ {t('common:core.dataset.import.Custom split char')}
+
+
+
+
+
+
+
+ {showQAPromptInput && (
+
+ {t('common:core.dataset.collection.QA Prompt')}
+
+ {getValues('qaPrompt')}
+
+
+ }
+ color={'black'}
+ position={'absolute'}
+ right={2}
+ bottom={2}
+ onClick={onOpenCustomPrompt}
+ >
+ {t('common:core.dataset.import.Custom prompt')}
+
+
+
+
+ )}
-
- )}
-
- )
- }
- ]}
- px={3}
- py={3}
- defaultBg="white"
- activeBg="white"
- value={way}
- w={'100%'}
- onChange={(e) => {
- setValue('way', e);
- }}
- >
-
+ )
+ }
+ ]}
+ gridGap={3}
+ px={3}
+ py={3}
+ defaultBg="white"
+ activeBg="white"
+ value={chunkSettingMode}
+ w={'100%'}
+ onChange={(e) => {
+ setValue('chunkSettingMode', e);
+ }}
+ />
+
+
+
- {feConfigs?.show_pay && (
-
-
- {priceTip}
-
-
- )}
+ {/*
+
+
+
+ {t('common:core.ai.model.Dataset Agent Model')}
+
+ ({
+ label: item.name,
+ value: item.model
+ }))}
+ onchange={(e) => {
+ setValue('llmModel', e);
+ }}
+ />
+
+
+
+ {t('dataset:vllm_model')}
+
+ ({
+ label: item.name,
+ value: item.model
+ }))}
+ onchange={(e) => {
+ setValue('vlmModel', e);
+ }}
+ />
+
+
+
+ */}
-
-
-
-
-
-
+
+
+
+
{isOpenCustomPrompt && (
@@ -273,7 +379,7 @@ function DataProcess({ showPreviewChunks = true }: { showPreviewChunks: boolean
onClose={onCloseCustomPrompt}
/>
)}
-
+ >
);
}
diff --git a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/PreviewData.tsx b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/PreviewData.tsx
index 892ffc25f4da..1b2ce5c23f2d 100644
--- a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/PreviewData.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/PreviewData.tsx
@@ -1,19 +1,162 @@
-import React from 'react';
-import Preview from '../components/Preview';
-import { Box, Button, Flex } from '@chakra-ui/react';
+import React, { useState } from 'react';
+import { Box, Button, Flex, HStack } from '@chakra-ui/react';
import { useTranslation } from 'next-i18next';
import { useContextSelector } from 'use-context-selector';
import { DatasetImportContext } from '../Context';
+import MyIcon from '@fastgpt/web/components/common/Icon';
+import FormLabel from '@fastgpt/web/components/common/MyBox/FormLabel';
+import EmptyTip from '@fastgpt/web/components/common/EmptyTip';
+import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
+import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
+import { splitText2Chunks } from '@fastgpt/global/common/string/textSplitter';
+import { getPreviewChunks } from '@/web/core/dataset/api';
+import { ImportSourceItemType } from '@/web/core/dataset/type';
+import { getPreviewSourceReadType } from '../utils';
+import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
+import MyBox from '@fastgpt/web/components/common/MyBox';
+import Markdown from '@/components/Markdown';
+import { useToast } from '@fastgpt/web/hooks/useToast';
-const PreviewData = ({ showPreviewChunks }: { showPreviewChunks: boolean }) => {
+const PreviewData = () => {
const { t } = useTranslation();
+ const { toast } = useToast();
const goToNext = useContextSelector(DatasetImportContext, (v) => v.goToNext);
+ const datasetId = useContextSelector(DatasetPageContext, (v) => v.datasetId);
+
+ const sources = useContextSelector(DatasetImportContext, (v) => v.sources);
+ const importSource = useContextSelector(DatasetImportContext, (v) => v.importSource);
+ const chunkSize = useContextSelector(DatasetImportContext, (v) => v.chunkSize);
+ const chunkOverlapRatio = useContextSelector(DatasetImportContext, (v) => v.chunkOverlapRatio);
+ const processParamsForm = useContextSelector(DatasetImportContext, (v) => v.processParamsForm);
+
+ const [previewFile, setPreviewFile] = useState();
+
+ const { data = [], loading: isLoading } = useRequest2(
+ async () => {
+ if (!previewFile) return;
+ if (importSource === ImportDataSourceEnum.fileCustom) {
+ const customSplitChar = processParamsForm.getValues('customSplitChar');
+ const { chunks } = splitText2Chunks({
+ text: previewFile.rawText || '',
+ chunkLen: chunkSize,
+ overlapRatio: chunkOverlapRatio,
+ customReg: customSplitChar ? [customSplitChar] : []
+ });
+ return chunks.map((chunk) => ({
+ q: chunk,
+ a: ''
+ }));
+ }
+
+ return getPreviewChunks({
+ datasetId,
+ type: getPreviewSourceReadType(previewFile),
+ sourceId:
+ previewFile.dbFileId ||
+ previewFile.link ||
+ previewFile.externalFileUrl ||
+ previewFile.apiFileId ||
+ '',
+
+ customPdfParse: processParamsForm.getValues('customPdfParse'),
+
+ chunkSize,
+ overlapRatio: chunkOverlapRatio,
+ customSplitChar: processParamsForm.getValues('customSplitChar'),
+
+ selector: processParamsForm.getValues('webSelector'),
+ isQAImport: importSource === ImportDataSourceEnum.csvTable,
+ externalFileId: previewFile.externalFileId
+ });
+ },
+ {
+ refreshDeps: [previewFile],
+ manual: false,
+ onSuccess(result) {
+ if (!previewFile) return;
+ if (!result || result.length === 0) {
+ toast({
+ title: t('dataset:preview_chunk_empty'),
+ status: 'error'
+ });
+ }
+ }
+ }
+ );
+
return (
-
-
-
+
+
+
+ {t('dataset:file_list')}
+
+
+ {sources.map((source) => (
+ setPreviewFile(source)}
+ >
+
+
+ {source.sourceName}
+
+
+ ))}
+
+
+
+
+ {t('dataset:preview_chunk')}
+
+ {t('dataset:preview_chunk_intro')}
+
+
+
+
+ {previewFile ? (
+ <>
+ {data.map((item, index) => (
+
+
+
+
+ ))}
+ >
+ ) : (
+
+ )}
+
+
+
+
diff --git a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/Upload.tsx b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/Upload.tsx
index e811a52540dd..489f9c0f21b7 100644
--- a/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/Upload.tsx
+++ b/projects/app/src/pageComponents/dataset/detail/Import/commonProgress/Upload.tsx
@@ -14,7 +14,10 @@ import {
IconButton,
Tooltip
} from '@chakra-ui/react';
-import { ImportDataSourceEnum } from '@fastgpt/global/core/dataset/constants';
+import {
+ DatasetCollectionDataProcessModeEnum,
+ ImportDataSourceEnum
+} from '@fastgpt/global/core/dataset/constants';
import { useTranslation } from 'next-i18next';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
@@ -34,6 +37,7 @@ import MyTag from '@fastgpt/web/components/common/Tag/index';
import { useContextSelector } from 'use-context-selector';
import { DatasetPageContext } from '@/web/core/dataset/context/datasetPageContext';
import { DatasetImportContext, type ImportFormType } from '../Context';
+import { ApiCreateDatasetCollectionParams } from '@fastgpt/global/core/dataset/api.d';
const Upload = () => {
const { t } = useTranslation();
@@ -77,7 +81,7 @@ const Upload = () => {
}, [waitingFilesCount, totalFilesCount, allFinished, t]);
const { runAsync: startUpload, loading: isLoading } = useRequest2(
- async ({ mode, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
+ async ({ trainingType, customSplitChar, qaPrompt, webSelector }: ImportFormType) => {
if (sources.length === 0) return;
const filterWaitingSources = sources.filter((item) => item.createStatus === 'waiting');
@@ -95,15 +99,21 @@ const Upload = () => {
);
// create collection
- const commonParams = {
+ const commonParams: ApiCreateDatasetCollectionParams & {
+ name: string;
+ } = {
parentId,
- trainingType: mode,
datasetId: datasetDetail._id,
+ name: item.sourceName,
+
+ customPdfParse: processParamsForm.getValues('customPdfParse'),
+
+ trainingType,
+ imageIndex: processParamsForm.getValues('imageIndex'),
+ autoIndexes: processParamsForm.getValues('autoIndexes'),
chunkSize,
chunkSplitter: customSplitChar,
- qaPrompt,
-
- name: item.sourceName
+ qaPrompt: trainingType === DatasetCollectionDataProcessModeEnum.qa ? qaPrompt : undefined
};
if (importSource === ImportDataSourceEnum.reTraining) {
const res = await postReTrainingDatasetFileCollection({
@@ -272,7 +282,7 @@ const Upload = () => {
|