Rhon Joe 1 рік тому
батько
коміт
52bec63275

+ 1 - 1
web/app/components/datasets/create/index.tsx

@@ -127,7 +127,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
         {(step === 2 && (!datasetId || (datasetId && !!detail))) && <StepTwo
           hasSetAPIKEY={!!embeddingsDefaultModel}
           onSetting={showSetAPIKey}
-          indexingType={detail?.indexing_technique || ''}
+          indexingType={detail?.indexing_technique}
           datasetId={datasetId}
           dataSourceType={dataSourceType}
           files={fileList.map(file => file.file)}

+ 34 - 32
web/app/components/datasets/create/step-two/index.tsx

@@ -1,4 +1,3 @@
-/* eslint-disable no-mixed-operators */
 'use client'
 import React, { useEffect, useLayoutEffect, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
@@ -11,7 +10,7 @@ import { groupBy } from 'lodash-es'
 import PreviewItem, { PreviewType } from './preview-item'
 import LanguageSelect from './language-select'
 import s from './index.module.css'
-import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
+import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
 import {
   createDocument,
   createFirstDocument,
@@ -33,13 +32,14 @@ import { useDatasetDetailContext } from '@/context/dataset-detail'
 import I18n from '@/context/i18n'
 import { IS_CE_EDITION } from '@/config'
 
+type ValueOf<T> = T[keyof T]
 type StepTwoProps = {
   isSetting?: boolean
   documentDetail?: FullDocumentDetail
   hasSetAPIKEY: boolean
   onSetting: () => void
   datasetId?: string
-  indexingType?: string
+  indexingType?: ValueOf<IndexingType>
   dataSourceType: DataSourceType
   files: CustomFile[]
   notionPages?: NotionPage[]
@@ -89,21 +89,23 @@ const StepTwo = ({
   const [rules, setRules] = useState<PreProcessingRule[]>([])
   const [defaultConfig, setDefaultConfig] = useState<Rules>()
   const hasSetIndexType = !!indexingType
-  const [indexType, setIndexType] = useState<IndexingType>(
-    indexingType
-      || hasSetAPIKEY
+  const [indexType, setIndexType] = useState<ValueOf<IndexingType>>(
+    (indexingType
+      || hasSetAPIKEY)
       ? IndexingType.QUALIFIED
       : IndexingType.ECONOMICAL,
   )
   const [docForm, setDocForm] = useState<DocForm | string>(
-    datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
+    (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT,
   )
   const [docLanguage, setDocLanguage] = useState<string>(locale === 'en' ? 'English' : 'Chinese')
   const [QATipHide, setQATipHide] = useState(false)
   const [previewSwitched, setPreviewSwitched] = useState(false)
   const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
-  const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
-  const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
+  const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
+  const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
+  const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null)
+
   const fileIndexingEstimate = (() => {
     return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
   })()
@@ -153,7 +155,7 @@ const StepTwo = ({
   }
   const resetRules = () => {
     if (defaultConfig) {
-      setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n')
+      setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n')
       setMax(defaultConfig.segmentation.max_tokens)
       setRules(defaultConfig.pre_processing_rules)
     }
@@ -161,12 +163,14 @@ const StepTwo = ({
 
   const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
     // eslint-disable-next-line @typescript-eslint/no-use-before-define
-    const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
-    if (segmentationType === SegmentType.CUSTOM)
+    const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!)
+    if (segmentationType === SegmentType.CUSTOM) {
       setCustomFileIndexingEstimate(res)
-
-    else
+    }
+    else {
       setAutomaticFileIndexingEstimate(res)
+      indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price })
+    }
   }
 
   const confirmChangeCustomConfig = () => {
@@ -179,8 +183,8 @@ const StepTwo = ({
   const getIndexing_technique = () => indexingType || indexType
 
   const getProcessRule = () => {
-    const processRule: any = {
-      rules: {}, // api will check this. It will be removed after api refactored.
+    const processRule: ProcessRule = {
+      rules: {} as any, // api will check this. It will be removed after api refactored.
       mode: segmentationType,
     }
     if (segmentationType === SegmentType.CUSTOM) {
@@ -220,37 +224,35 @@ const StepTwo = ({
     }) as NotionInfo[]
   }
 
-  const getFileIndexingEstimateParams = (docForm: DocForm) => {
-    let params
+  const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
     if (dataSourceType === DataSourceType.FILE) {
-      params = {
+      return {
         info_list: {
           data_source_type: dataSourceType,
           file_info_list: {
-            file_ids: files.map(file => file.id),
+            file_ids: files.map(file => file.id) as string[],
           },
         },
-        indexing_technique: getIndexing_technique(),
+        indexing_technique: getIndexing_technique() as string,
         process_rule: getProcessRule(),
         doc_form: docForm,
         doc_language: docLanguage,
-        dataset_id: datasetId,
+        dataset_id: datasetId as string,
       }
     }
     if (dataSourceType === DataSourceType.NOTION) {
-      params = {
+      return {
         info_list: {
           data_source_type: dataSourceType,
           notion_info_list: getNotionInfo(),
         },
-        indexing_technique: getIndexing_technique(),
+        indexing_technique: getIndexing_technique() as string,
         process_rule: getProcessRule(),
         doc_form: docForm,
         doc_language: docLanguage,
-        dataset_id: datasetId,
+        dataset_id: datasetId as string,
       }
     }
-    return params
   }
 
   const getCreationParams = () => {
@@ -291,7 +293,7 @@ const StepTwo = ({
     try {
       const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
       const separator = res.rules.segmentation.separator
-      setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
+      setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
       setMax(res.rules.segmentation.max_tokens)
       setRules(res.rules.pre_processing_rules)
       setDefaultConfig(res.rules)
@@ -306,7 +308,7 @@ const StepTwo = ({
       const rules = documentDetail.dataset_process_rule.rules
       const separator = rules.segmentation.separator
       const max = rules.segmentation.max_tokens
-      setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
+      setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n')
       setMax(max)
       setRules(rules.pre_processing_rules)
       setDefaultConfig(rules)
@@ -330,7 +332,7 @@ const StepTwo = ({
         res = await createFirstDocument({
           body: params,
         })
-        updateIndexingTypeCache && updateIndexingTypeCache(indexType)
+        updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
         updateResultCache && updateResultCache(res)
       }
       else {
@@ -338,7 +340,7 @@ const StepTwo = ({
           datasetId,
           body: params,
         })
-        updateIndexingTypeCache && updateIndexingTypeCache(indexType)
+        updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
         updateResultCache && updateResultCache(res)
       }
       if (mutateDatasetRes)
@@ -549,9 +551,9 @@ const StepTwo = ({
                     <div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
                     <div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
                     {
-                      fileIndexingEstimate
+                      estimateTokes
                         ? (
-                          <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(fileIndexingEstimate.total_price)}</span>)</div>
+                          <div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div>
                         )
                         : (
                           <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>

+ 9 - 2
web/models/datasets.ts

@@ -183,15 +183,22 @@ export type DocumentListResponse = {
   limit: number
 }
 
-export type CreateDocumentReq = {
+export type DocumentReq = {
   original_document_id?: string
   indexing_technique?: string
   doc_form: 'text_model' | 'qa_model'
   doc_language: string
-  data_source: DataSource
   process_rule: ProcessRule
 }
 
+export type CreateDocumentReq = DocumentReq & {
+  data_source: DataSource
+}
+
+export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
+  dataset_id: string
+}
+
 export type DataSource = {
   type: DataSourceType
   info_list: {

+ 2 - 1
web/service/datasets.ts

@@ -10,6 +10,7 @@ import type {
   FileIndexingEstimateResponse,
   HitTestingRecordsResponse,
   HitTestingResponse,
+  IndexingEstimateParams,
   IndexingEstimateResponse,
   IndexingStatusBatchResponse,
   IndexingStatusResponse,
@@ -189,7 +190,7 @@ export const fetchTestingRecords: Fetcher<HitTestingRecordsResponse, { datasetId
   return get<HitTestingRecordsResponse>(`/datasets/${datasetId}/queries`, { params })
 }
 
-export const fetchFileIndexingEstimate: Fetcher<FileIndexingEstimateResponse, any> = (body: any) => {
+export const fetchFileIndexingEstimate: Fetcher<FileIndexingEstimateResponse, IndexingEstimateParams> = (body: IndexingEstimateParams) => {
   return post<FileIndexingEstimateResponse>('/datasets/indexing-estimate', { body })
 }