|
@@ -14,7 +14,7 @@ import PreviewItem, { PreviewType } from './preview-item'
|
|
|
import LanguageSelect from './language-select'
|
|
|
import s from './index.module.css'
|
|
|
import cn from '@/utils/classnames'
|
|
|
-import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
|
|
|
+import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
|
|
|
import {
|
|
|
createDocument,
|
|
|
createFirstDocument,
|
|
@@ -41,8 +41,10 @@ import { IS_CE_EDITION } from '@/config'
|
|
|
import { RETRIEVE_METHOD } from '@/types/app'
|
|
|
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
|
|
|
import Tooltip from '@/app/components/base/tooltip'
|
|
|
-import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
|
|
+import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
|
|
import { LanguagesSupported } from '@/i18n/language'
|
|
|
+import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
|
|
|
+import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
|
|
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
|
|
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
|
|
|
|
|
@@ -109,7 +111,7 @@ const StepTwo = ({
|
|
|
const [previewScrolled, setPreviewScrolled] = useState(false)
|
|
|
const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
|
|
|
const [segmentIdentifier, setSegmentIdentifier] = useState('\\n')
|
|
|
- const [max, setMax] = useState(500)
|
|
|
+ const [max, setMax] = useState(5000) // default chunk length
|
|
|
const [overlap, setOverlap] = useState(50)
|
|
|
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
|
|
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
|
@@ -131,7 +133,6 @@ const StepTwo = ({
|
|
|
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
|
|
|
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
|
|
|
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
|
|
|
- const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null)
|
|
|
|
|
|
const fileIndexingEstimate = (() => {
|
|
|
return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
|
|
@@ -192,13 +193,10 @@ const StepTwo = ({
|
|
|
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
|
|
|
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
|
|
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!)
|
|
|
- if (segmentationType === SegmentType.CUSTOM) {
|
|
|
+ if (segmentationType === SegmentType.CUSTOM)
|
|
|
setCustomFileIndexingEstimate(res)
|
|
|
- }
|
|
|
- else {
|
|
|
+ else
|
|
|
setAutomaticFileIndexingEstimate(res)
|
|
|
- indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price })
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
const confirmChangeCustomConfig = () => {
|
|
@@ -310,6 +308,19 @@ const StepTwo = ({
|
|
|
defaultModel: rerankDefaultModel,
|
|
|
currentModel: isRerankDefaultModelVaild,
|
|
|
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
|
|
|
+ const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
|
|
|
+ const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
|
|
|
+ const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
|
|
|
+ currentDataset?.embedding_model
|
|
|
+ ? {
|
|
|
+ provider: currentDataset.embedding_model_provider,
|
|
|
+ model: currentDataset.embedding_model,
|
|
|
+ }
|
|
|
+ : {
|
|
|
+ provider: defaultEmbeddingModel?.provider.provider || '',
|
|
|
+ model: defaultEmbeddingModel?.model || '',
|
|
|
+ },
|
|
|
+ )
|
|
|
const getCreationParams = () => {
|
|
|
let params
|
|
|
if (segmentationType === SegmentType.CUSTOM && overlap > max) {
|
|
@@ -324,6 +335,8 @@ const StepTwo = ({
|
|
|
process_rule: getProcessRule(),
|
|
|
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
|
|
retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
|
|
|
+ embedding_model: embeddingModel.model, // Readonly
|
|
|
+ embedding_model_provider: embeddingModel.provider, // Readonly
|
|
|
} as CreateDocumentReq
|
|
|
}
|
|
|
else { // create
|
|
@@ -360,6 +373,8 @@ const StepTwo = ({
|
|
|
doc_language: docLanguage,
|
|
|
|
|
|
retrieval_model: postRetrievalConfig,
|
|
|
+ embedding_model: embeddingModel.model,
|
|
|
+ embedding_model_provider: embeddingModel.provider,
|
|
|
} as CreateDocumentReq
|
|
|
if (dataSourceType === DataSourceType.FILE) {
|
|
|
params.data_source.info_list.file_info_list = {
|
|
@@ -613,14 +628,17 @@ const StepTwo = ({
|
|
|
<div className={s.formRow}>
|
|
|
<div className='w-full'>
|
|
|
<div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div>
|
|
|
- <input
|
|
|
- type="number"
|
|
|
- className={s.input}
|
|
|
- placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
|
|
- value={max}
|
|
|
- min={1}
|
|
|
- onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
- />
|
|
|
+ <div className='relative w-full'>
|
|
|
+ <input
|
|
|
+ type="number"
|
|
|
+ className={s.input}
|
|
|
+ placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
|
|
+ value={max}
|
|
|
+ min={1}
|
|
|
+ onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
+ />
|
|
|
+ <div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
|
|
|
+ </div>
|
|
|
</div>
|
|
|
</div>
|
|
|
<div className={s.formRow}>
|
|
@@ -635,14 +653,17 @@ const StepTwo = ({
|
|
|
}
|
|
|
/>
|
|
|
</div>
|
|
|
- <input
|
|
|
- type="number"
|
|
|
- className={s.input}
|
|
|
- placeholder={t('datasetCreation.stepTwo.overlap') || ''}
|
|
|
- value={overlap}
|
|
|
- min={1}
|
|
|
- onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
- />
|
|
|
+ <div className='relative w-full'>
|
|
|
+ <input
|
|
|
+ type="number"
|
|
|
+ className={s.input}
|
|
|
+ placeholder={t('datasetCreation.stepTwo.overlap') || ''}
|
|
|
+ value={overlap}
|
|
|
+ min={1}
|
|
|
+ onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
|
|
+ />
|
|
|
+ <div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
|
|
|
+ </div>
|
|
|
</div>
|
|
|
</div>
|
|
|
<div className={s.formRow}>
|
|
@@ -675,7 +696,7 @@ const StepTwo = ({
|
|
|
!isAPIKeySet && s.disabled,
|
|
|
!hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active,
|
|
|
hasSetIndexType && s.disabled,
|
|
|
- hasSetIndexType && '!w-full',
|
|
|
+ hasSetIndexType && '!w-full !min-h-[96px]',
|
|
|
)}
|
|
|
onClick={() => {
|
|
|
if (isAPIKeySet)
|
|
@@ -690,16 +711,6 @@ const StepTwo = ({
|
|
|
{!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>}
|
|
|
</div>
|
|
|
<div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
|
|
|
- <div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
|
|
|
- {
|
|
|
- estimateTokes
|
|
|
- ? (
|
|
|
- <div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div>
|
|
|
- )
|
|
|
- : (
|
|
|
- <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
|
|
|
- )
|
|
|
- }
|
|
|
</div>
|
|
|
{!isAPIKeySet && (
|
|
|
<div className={s.warningTip}>
|
|
@@ -717,7 +728,7 @@ const StepTwo = ({
|
|
|
s.indexItem,
|
|
|
!hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active,
|
|
|
hasSetIndexType && s.disabled,
|
|
|
- hasSetIndexType && '!w-full',
|
|
|
+ hasSetIndexType && '!w-full !min-h-[96px]',
|
|
|
)}
|
|
|
onClick={changeToEconomicalType}
|
|
|
>
|
|
@@ -726,13 +737,11 @@ const StepTwo = ({
|
|
|
<div className={s.typeHeader}>
|
|
|
<div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div>
|
|
|
<div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div>
|
|
|
- <div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
|
|
|
- <div className='text-xs font-medium text-gray-800'>0 tokens</div>
|
|
|
</div>
|
|
|
</div>
|
|
|
)}
|
|
|
</div>
|
|
|
- {hasSetIndexType && (
|
|
|
+ {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
|
|
|
<div className='mt-2 text-xs text-gray-500 font-medium'>
|
|
|
{t('datasetCreation.stepTwo.indexSettedTip')}
|
|
|
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
|
|
@@ -767,12 +776,32 @@ const StepTwo = ({
|
|
|
)}
|
|
|
</div>
|
|
|
)}
|
|
|
+ {/* Embedding model */}
|
|
|
+ {indexType === IndexingType.QUALIFIED && (
|
|
|
+ <div className='mb-2'>
|
|
|
+ <div className={cn(s.label, datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
|
|
|
+ <ModelSelector
|
|
|
+ readonly={!!datasetId}
|
|
|
+ defaultModel={embeddingModel}
|
|
|
+ modelList={embeddingModelList}
|
|
|
+ onSelect={(model: DefaultModel) => {
|
|
|
+ setEmbeddingModel(model)
|
|
|
+ }}
|
|
|
+ />
|
|
|
+ {!!datasetId && (
|
|
|
+ <div className='mt-2 text-xs text-gray-500 font-medium'>
|
|
|
+ {t('datasetCreation.stepTwo.indexSettedTip')}
|
|
|
+ <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
|
|
|
+ </div>
|
|
|
+ )}
|
|
|
+ </div>
|
|
|
+ )}
|
|
|
{/* Retrieval Method Config */}
|
|
|
<div>
|
|
|
{!datasetId
|
|
|
? (
|
|
|
<div className={s.label}>
|
|
|
- {t('datasetSettings.form.retrievalSetting.title')}
|
|
|
+ <div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div>
|
|
|
<div className='leading-[18px] text-xs font-normal text-gray-500'>
|
|
|
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
|
|
|
{t('datasetSettings.form.retrievalSetting.longDescription')}
|