|
@@ -1,4 +1,3 @@
|
|
|
-/* eslint-disable no-mixed-operators */
|
|
|
'use client'
|
|
|
import React, { useEffect, useLayoutEffect, useRef, useState } from 'react'
|
|
|
import { useTranslation } from 'react-i18next'
|
|
@@ -11,7 +10,7 @@ import { groupBy } from 'lodash-es'
|
|
|
import PreviewItem, { PreviewType } from './preview-item'
|
|
|
import LanguageSelect from './language-select'
|
|
|
import s from './index.module.css'
|
|
|
-import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
|
|
|
+import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
|
|
|
import {
|
|
|
createDocument,
|
|
|
createFirstDocument,
|
|
@@ -33,13 +32,14 @@ import { useDatasetDetailContext } from '@/context/dataset-detail'
|
|
|
import I18n from '@/context/i18n'
|
|
|
import { IS_CE_EDITION } from '@/config'
|
|
|
|
|
|
+type ValueOf<T> = T[keyof T]
|
|
|
type StepTwoProps = {
|
|
|
isSetting?: boolean
|
|
|
documentDetail?: FullDocumentDetail
|
|
|
hasSetAPIKEY: boolean
|
|
|
onSetting: () => void
|
|
|
datasetId?: string
|
|
|
- indexingType?: string
|
|
|
+ indexingType?: ValueOf<IndexingType>
|
|
|
dataSourceType: DataSourceType
|
|
|
files: CustomFile[]
|
|
|
notionPages?: NotionPage[]
|
|
@@ -89,21 +89,23 @@ const StepTwo = ({
|
|
|
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
|
|
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
|
|
const hasSetIndexType = !!indexingType
|
|
|
- const [indexType, setIndexType] = useState<IndexingType>(
|
|
|
- indexingType
|
|
|
- || hasSetAPIKEY
|
|
|
+ const [indexType, setIndexType] = useState<ValueOf<IndexingType>>(
|
|
|
+ (indexingType
|
|
|
+ || hasSetAPIKEY)
|
|
|
? IndexingType.QUALIFIED
|
|
|
: IndexingType.ECONOMICAL,
|
|
|
)
|
|
|
const [docForm, setDocForm] = useState<DocForm | string>(
|
|
|
- datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
|
|
|
+ (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT,
|
|
|
)
|
|
|
const [docLanguage, setDocLanguage] = useState<string>(locale === 'en' ? 'English' : 'Chinese')
|
|
|
const [QATipHide, setQATipHide] = useState(false)
|
|
|
const [previewSwitched, setPreviewSwitched] = useState(false)
|
|
|
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
|
|
|
- const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
|
|
- const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
|
|
+ const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
|
|
|
+ const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
|
|
|
+ const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null)
|
|
|
+
|
|
|
const fileIndexingEstimate = (() => {
|
|
|
return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
|
|
|
})()
|
|
@@ -153,7 +155,7 @@ const StepTwo = ({
|
|
|
}
|
|
|
const resetRules = () => {
|
|
|
if (defaultConfig) {
|
|
|
- setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n')
|
|
|
+ setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n')
|
|
|
setMax(defaultConfig.segmentation.max_tokens)
|
|
|
setRules(defaultConfig.pre_processing_rules)
|
|
|
}
|
|
@@ -161,12 +163,14 @@ const StepTwo = ({
|
|
|
|
|
|
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
|
|
|
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
|
|
- const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
|
|
|
- if (segmentationType === SegmentType.CUSTOM)
|
|
|
+ const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!)
|
|
|
+ if (segmentationType === SegmentType.CUSTOM) {
|
|
|
setCustomFileIndexingEstimate(res)
|
|
|
-
|
|
|
- else
|
|
|
+ }
|
|
|
+ else {
|
|
|
setAutomaticFileIndexingEstimate(res)
|
|
|
+ indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price })
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
const confirmChangeCustomConfig = () => {
|
|
@@ -179,8 +183,8 @@ const StepTwo = ({
|
|
|
const getIndexing_technique = () => indexingType || indexType
|
|
|
|
|
|
const getProcessRule = () => {
|
|
|
- const processRule: any = {
|
|
|
- rules: {}, // api will check this. It will be removed after api refactored.
|
|
|
+ const processRule: ProcessRule = {
|
|
|
+ rules: {} as any, // api will check this. It will be removed after api refactored.
|
|
|
mode: segmentationType,
|
|
|
}
|
|
|
if (segmentationType === SegmentType.CUSTOM) {
|
|
@@ -220,37 +224,35 @@ const StepTwo = ({
|
|
|
}) as NotionInfo[]
|
|
|
}
|
|
|
|
|
|
- const getFileIndexingEstimateParams = (docForm: DocForm) => {
|
|
|
- let params
|
|
|
+ const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
|
|
|
if (dataSourceType === DataSourceType.FILE) {
|
|
|
- params = {
|
|
|
+ return {
|
|
|
info_list: {
|
|
|
data_source_type: dataSourceType,
|
|
|
file_info_list: {
|
|
|
- file_ids: files.map(file => file.id),
|
|
|
+ file_ids: files.map(file => file.id) as string[],
|
|
|
},
|
|
|
},
|
|
|
- indexing_technique: getIndexing_technique(),
|
|
|
+ indexing_technique: getIndexing_technique() as string,
|
|
|
process_rule: getProcessRule(),
|
|
|
doc_form: docForm,
|
|
|
doc_language: docLanguage,
|
|
|
- dataset_id: datasetId,
|
|
|
+ dataset_id: datasetId as string,
|
|
|
}
|
|
|
}
|
|
|
if (dataSourceType === DataSourceType.NOTION) {
|
|
|
- params = {
|
|
|
+ return {
|
|
|
info_list: {
|
|
|
data_source_type: dataSourceType,
|
|
|
notion_info_list: getNotionInfo(),
|
|
|
},
|
|
|
- indexing_technique: getIndexing_technique(),
|
|
|
+ indexing_technique: getIndexing_technique() as string,
|
|
|
process_rule: getProcessRule(),
|
|
|
doc_form: docForm,
|
|
|
doc_language: docLanguage,
|
|
|
- dataset_id: datasetId,
|
|
|
+ dataset_id: datasetId as string,
|
|
|
}
|
|
|
}
|
|
|
- return params
|
|
|
}
|
|
|
|
|
|
const getCreationParams = () => {
|
|
@@ -291,7 +293,7 @@ const StepTwo = ({
|
|
|
try {
|
|
|
const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
|
|
|
const separator = res.rules.segmentation.separator
|
|
|
- setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
|
|
|
+ setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
|
|
|
setMax(res.rules.segmentation.max_tokens)
|
|
|
setRules(res.rules.pre_processing_rules)
|
|
|
setDefaultConfig(res.rules)
|
|
@@ -306,7 +308,7 @@ const StepTwo = ({
|
|
|
const rules = documentDetail.dataset_process_rule.rules
|
|
|
const separator = rules.segmentation.separator
|
|
|
const max = rules.segmentation.max_tokens
|
|
|
- setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
|
|
|
+ setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
|
|
|
setMax(max)
|
|
|
setRules(rules.pre_processing_rules)
|
|
|
setDefaultConfig(rules)
|
|
@@ -330,7 +332,7 @@ const StepTwo = ({
|
|
|
res = await createFirstDocument({
|
|
|
body: params,
|
|
|
})
|
|
|
- updateIndexingTypeCache && updateIndexingTypeCache(indexType)
|
|
|
+ updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
|
|
|
updateResultCache && updateResultCache(res)
|
|
|
}
|
|
|
else {
|
|
@@ -338,7 +340,7 @@ const StepTwo = ({
|
|
|
datasetId,
|
|
|
body: params,
|
|
|
})
|
|
|
- updateIndexingTypeCache && updateIndexingTypeCache(indexType)
|
|
|
+ updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
|
|
|
updateResultCache && updateResultCache(res)
|
|
|
}
|
|
|
if (mutateDatasetRes)
|
|
@@ -549,9 +551,9 @@ const StepTwo = ({
|
|
|
<div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
|
|
|
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
|
|
|
{
|
|
|
- fileIndexingEstimate
|
|
|
+ estimateTokes
|
|
|
? (
|
|
|
- <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(fileIndexingEstimate.total_price)}</span>)</div>
|
|
|
+ <div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div>
|
|
|
)
|
|
|
: (
|
|
|
<div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
|