import type { FC } from 'react' import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import useSWR from 'swr' import { useContext } from 'use-context-selector' import { useTranslation } from 'react-i18next' import { omit } from 'lodash-es' import { RiLoader2Line, RiPauseCircleLine, RiPlayCircleLine } from '@remixicon/react' import Image from 'next/image' import { FieldInfo } from '../metadata' import { useDocumentContext } from '../index' import { IndexingType } from '../../../create/step-two' import { indexMethodIcon, retrievalIcon } from '../../../create/icons' import EmbeddingSkeleton from './skeleton' import { RETRIEVE_METHOD } from '@/types/app' import cn from '@/utils/classnames' import Divider from '@/app/components/base/divider' import { ToastContext } from '@/app/components/base/toast' import { ProcessMode, type ProcessRuleResponse } from '@/models/datasets' import type { CommonResponse } from '@/models/common' import { asyncRunSafe, sleep } from '@/utils' import { fetchIndexingStatus as doFetchIndexingStatus, fetchProcessRule, pauseDocIndexing, resumeDocIndexing, } from '@/service/datasets' type IEmbeddingDetailProps = { datasetId?: string documentId?: string indexingType?: IndexingType retrievalMethod?: RETRIEVE_METHOD detailUpdate: VoidFunction } type IRuleDetailProps = { sourceData?: ProcessRuleResponse indexingType?: IndexingType retrievalMethod?: RETRIEVE_METHOD } const RuleDetail: FC = React.memo(({ sourceData, indexingType, retrievalMethod, }) => { const { t } = useTranslation() const segmentationRuleMap = { mode: t('datasetDocuments.embedding.mode'), segmentLength: t('datasetDocuments.embedding.segmentLength'), textCleaning: t('datasetDocuments.embedding.textCleaning'), } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') return t('datasetCreation.stepTwo.removeExtraSpaces') if (key === 'remove_urls_emails') return t('datasetCreation.stepTwo.removeUrlEmails') if (key === 'remove_stopwords') return t('datasetCreation.stepTwo.removeStopwords') } const isNumber = (value: unknown) => { return typeof value === 'number' } const getValue = useCallback((field: string) => { let value: string | number | undefined = '-' const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens) ? sourceData.rules.segmentation.max_tokens : value const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens) ? sourceData.rules.subchunk_segmentation.max_tokens : value switch (field) { case 'mode': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? (t('datasetDocuments.embedding.custom') as string) : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph' ? t('dataset.parentMode.paragraph') : t('dataset.parentMode.fullDoc')}` break case 'segmentLength': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? maxTokens : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}` break default: value = !sourceData?.mode ? value : sourceData?.rules?.pre_processing_rules?.filter(rule => rule.enabled).map(rule => getRuleName(rule.id)).join(',') break } return value // eslint-disable-next-line react-hooks/exhaustive-deps }, [sourceData]) return
{Object.keys(segmentationRuleMap).map((field) => { return })}
} /> } />
}) RuleDetail.displayName = 'RuleDetail' const EmbeddingDetail: FC = ({ datasetId: dstId, documentId: docId, detailUpdate, indexingType, retrievalMethod, }) => { const { t } = useTranslation() const { notify } = useContext(ToastContext) const datasetId = useDocumentContext(s => s.datasetId) const documentId = useDocumentContext(s => s.documentId) const localDatasetId = dstId ?? datasetId const localDocumentId = docId ?? documentId const [indexingStatusDetail, setIndexingStatusDetail] = useState(null) const fetchIndexingStatus = async () => { const status = await doFetchIndexingStatus({ datasetId: localDatasetId, documentId: localDocumentId }) setIndexingStatusDetail(status) return status } const isStopQuery = useRef(false) const stopQueryStatus = useCallback(() => { isStopQuery.current = true }, []) const startQueryStatus = useCallback(async () => { if (isStopQuery.current) return try { const indexingStatusDetail = await fetchIndexingStatus() if (['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status)) { stopQueryStatus() detailUpdate() return } await sleep(2500) await startQueryStatus() } catch (e) { await sleep(2500) await startQueryStatus() } // eslint-disable-next-line react-hooks/exhaustive-deps }, [stopQueryStatus]) useEffect(() => { isStopQuery.current = false startQueryStatus() return () => { stopQueryStatus() } }, [startQueryStatus, stopQueryStatus]) const { data: ruleDetail } = useSWR({ action: 'fetchProcessRule', params: { documentId: localDocumentId }, }, apiParams => fetchProcessRule(omit(apiParams, 'action')), { revalidateOnFocus: false, }) const isEmbedding = useMemo(() => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingCompleted = useMemo(() => ['completed'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingPaused = useMemo(() => ['paused'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingError = useMemo(() => ['error'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const percent = useMemo(() => { const completedCount = indexingStatusDetail?.completed_segments || 0 const totalCount = indexingStatusDetail?.total_segments || 0 if (totalCount === 0) return 0 const percent = Math.round(completedCount * 100 / totalCount) return percent > 100 ? 100 : percent }, [indexingStatusDetail]) const handleSwitch = async () => { const opApi = isEmbedding ? pauseDocIndexing : resumeDocIndexing const [e] = await asyncRunSafe(opApi({ datasetId: localDatasetId, documentId: localDocumentId }) as Promise) if (!e) { notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) // if the embedding is resumed from paused, we need to start the query status if (isEmbeddingPaused) { isStopQuery.current = false startQueryStatus() detailUpdate() } setIndexingStatusDetail(null) } else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) } } return ( <>
{isEmbedding && } {isEmbedding && t('datasetDocuments.embedding.processing')} {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')} {isEmbeddingPaused && t('datasetDocuments.embedding.paused')} {isEmbeddingError && t('datasetDocuments.embedding.error')} {isEmbedding && ( )} {isEmbeddingPaused && ( )}
{/* progress bar */}
{`${t('datasetDocuments.embedding.segments')} ${indexingStatusDetail?.completed_segments || '--'}/${indexingStatusDetail?.total_segments || '--'} · ${percent}%`}
) } export default React.memo(EmbeddingDetail)