import type { FC } from 'react' import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import useSWR from 'swr' import { useRouter } from 'next/navigation' import { useTranslation } from 'react-i18next' import { omit } from 'lodash-es' import { ArrowRightIcon } from '@heroicons/react/24/solid' import { RiCheckboxCircleFill, RiErrorWarningFill, RiLoader2Fill, RiTerminalBoxLine, } from '@remixicon/react' import Image from 'next/image' import { indexMethodIcon, retrievalIcon } from '../icons' import { IndexingType } from '../step-two' import DocumentFileIcon from '../../common/document-file-icon' import cn from '@/utils/classnames' import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' import Button from '@/app/components/base/button' import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets' import { DataSourceType, ProcessMode } from '@/models/datasets' import NotionIcon from '@/app/components/base/notion-icon' import PriorityLabel from '@/app/components/billing/priority-label' import { Plan } from '@/app/components/billing/type' import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general' import UpgradeBtn from '@/app/components/billing/upgrade-btn' import { useProviderContext } from '@/context/provider-context' import { sleep } from '@/utils' import { RETRIEVE_METHOD } from '@/types/app' import Tooltip from '@/app/components/base/tooltip' import { useInvalidDocumentList } from '@/service/knowledge/use-document' type Props = { datasetId: string batchId: string documents?: FullDocumentDetail[] indexingType?: string retrievalMethod?: string } const RuleDetail: FC<{ sourceData?: ProcessRuleResponse indexingType?: string retrievalMethod?: string }> = ({ sourceData, indexingType, retrievalMethod }) => { const { t } = useTranslation() const segmentationRuleMap = { mode: t('datasetDocuments.embedding.mode'), segmentLength: t('datasetDocuments.embedding.segmentLength'), textCleaning: t('datasetDocuments.embedding.textCleaning'), } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') return t('datasetCreation.stepTwo.removeExtraSpaces') if (key === 'remove_urls_emails') return t('datasetCreation.stepTwo.removeUrlEmails') if (key === 'remove_stopwords') return t('datasetCreation.stepTwo.removeStopwords') } const isNumber = (value: unknown) => { return typeof value === 'number' } const getValue = useCallback((field: string) => { let value: string | number | undefined = '-' const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens) ? sourceData.rules.segmentation.max_tokens : value const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens) ? sourceData.rules.subchunk_segmentation.max_tokens : value switch (field) { case 'mode': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : `${t('datasetDocuments.embedding.hierarchical')} ยท ${sourceData?.rules?.parent_mode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')}` break case 'segmentLength': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? maxTokens : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}` break default: value = !sourceData?.mode ? value : sourceData?.rules?.pre_processing_rules?.filter(rule => rule.enabled).map(rule => getRuleName(rule.id)).join(',') break } return value // eslint-disable-next-line react-hooks/exhaustive-deps }, [sourceData]) return
{Object.keys(segmentationRuleMap).map((field) => { return })} } /> } />
} const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => { const { t } = useTranslation() const { enableBilling, plan } = useProviderContext() const getFirstDocument = documents[0] const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState([]) const fetchIndexingStatus = async () => { const status = await doFetchIndexingStatus({ datasetId, batchId }) setIndexingStatusDetail(status.data) return status.data } const [isStopQuery, setIsStopQuery] = useState(false) const isStopQueryRef = useRef(isStopQuery) useEffect(() => { isStopQueryRef.current = isStopQuery }, [isStopQuery]) const stopQueryStatus = () => { setIsStopQuery(true) } const startQueryStatus = async () => { if (isStopQueryRef.current) return try { const indexingStatusBatchDetail = await fetchIndexingStatus() const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status)) if (isCompleted) { stopQueryStatus() return } await sleep(2500) await startQueryStatus() } catch (e) { await sleep(2500) await startQueryStatus() } } useEffect(() => { setIsStopQuery(false) startQueryStatus() return () => { stopQueryStatus() } // eslint-disable-next-line react-hooks/exhaustive-deps }, []) // get rule const { data: ruleDetail } = useSWR({ action: 'fetchProcessRule', params: { documentId: getFirstDocument.id }, }, apiParams => fetchProcessRule(omit(apiParams, 'action')), { revalidateOnFocus: false, }) const router = useRouter() const invalidDocumentList = useInvalidDocumentList() const navToDocumentList = () => { invalidDocumentList() router.push(`/datasets/${datasetId}/documents`) } const navToApiDocs = () => { router.push('/datasets?category=api') } const isEmbedding = useMemo(() => { return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || '')) }, [indexingStatusBatchDetail]) const isEmbeddingCompleted = useMemo(() => { return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || '')) }, [indexingStatusBatchDetail]) const getSourceName = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.name } const getFileType = (name?: string) => name?.split('.').pop() || 'txt' const getSourcePercent = (detail: IndexingStatusResponse) => { const completedCount = detail.completed_segments || 0 const totalCount = detail.total_segments || 0 if (totalCount === 0) return 0 const percent = Math.round(completedCount * 100 / totalCount) return percent > 100 ? 100 : percent } const getSourceType = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.data_source_type as DataSourceType } const getIcon = (id: string) => { const doc = documents.find(document => document.id === id) return doc?.data_source_info.notion_page_icon } const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') return ( <>
{isEmbedding &&
{t('datasetDocuments.embedding.processing')}
} {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
{ enableBilling && plan.type !== Plan.team && (
{t('billing.plansCommon.documentProcessingPriorityUpgrade')}
) }
{indexingStatusBatchDetail.map(indexingStatusDetail => (
{isSourceEmbedding(indexingStatusDetail) && (
)}
{getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && ( //
)} {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && ( )}
{getSourceName(indexingStatusDetail.id)}
{ enableBilling && ( ) }
{isSourceEmbedding(indexingStatusDetail) && (
{`${getSourcePercent(indexingStatusDetail)}%`}
)} {indexingStatusDetail.indexing_status === 'error' && ( )} {indexingStatusDetail.indexing_status === 'completed' && ( )}
))}

) } export default EmbeddingProcess