index.tsx 14 KB


  1. import type { FC } from 'react'
  2. import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
  3. import useSWR from 'swr'
  4. import { useRouter } from 'next/navigation'
  5. import { useTranslation } from 'react-i18next'
  6. import { omit } from 'lodash-es'
  7. import { ArrowRightIcon } from '@heroicons/react/24/solid'
  8. import {
  9. RiCheckboxCircleFill,
  10. RiErrorWarningFill,
  11. RiLoader2Fill,
  12. RiTerminalBoxLine,
  13. } from '@remixicon/react'
  14. import Image from 'next/image'
  15. import { indexMethodIcon, retrievalIcon } from '../icons'
  16. import { IndexingType } from '../step-two'
  17. import DocumentFileIcon from '../../common/document-file-icon'
  18. import cn from '@/utils/classnames'
  19. import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
  20. import Button from '@/app/components/base/button'
  21. import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets'
  22. import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets'
  23. import { DataSourceType, ProcessMode } from '@/models/datasets'
  24. import NotionIcon from '@/app/components/base/notion-icon'
  25. import PriorityLabel from '@/app/components/billing/priority-label'
  26. import { Plan } from '@/app/components/billing/type'
  27. import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general'
  28. import UpgradeBtn from '@/app/components/billing/upgrade-btn'
  29. import { useProviderContext } from '@/context/provider-context'
  30. import { sleep } from '@/utils'
  31. import { RETRIEVE_METHOD } from '@/types/app'
  32. import Tooltip from '@/app/components/base/tooltip'
  33. type Props = {
  34. datasetId: string
  35. batchId: string
  36. documents?: FullDocumentDetail[]
  37. indexingType?: string
  38. retrievalMethod?: string
  39. }
  40. const RuleDetail: FC<{
  41. sourceData?: ProcessRuleResponse
  42. indexingType?: string
  43. retrievalMethod?: string
  44. }> = ({ sourceData, indexingType, retrievalMethod }) => {
  45. const { t } = useTranslation()
  46. const segmentationRuleMap = {
  47. mode: t('datasetDocuments.embedding.mode'),
  48. segmentLength: t('datasetDocuments.embedding.segmentLength'),
  49. textCleaning: t('datasetDocuments.embedding.textCleaning'),
  50. }
  51. const getRuleName = (key: string) => {
  52. if (key === 'remove_extra_spaces')
  53. return t('datasetCreation.stepTwo.removeExtraSpaces')
  54. if (key === 'remove_urls_emails')
  55. return t('datasetCreation.stepTwo.removeUrlEmails')
  56. if (key === 'remove_stopwords')
  57. return t('datasetCreation.stepTwo.removeStopwords')
  58. }
  59. const isNumber = (value: unknown) => {
  60. return typeof value === 'number'
  61. }
  62. const getValue = useCallback((field: string) => {
  63. let value: string | number | undefined = '-'
  64. const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens)
  65. ? sourceData.rules.segmentation.max_tokens
  66. : value
  67. const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens)
  68. ? sourceData.rules.subchunk_segmentation.max_tokens
  69. : value
  70. switch (field) {
  71. case 'mode':
  72. value = !sourceData?.mode
  73. ? value
  74. : sourceData.mode === ProcessMode.general
  75. ? (t('datasetDocuments.embedding.custom') as string)
  76. : `${t('datasetDocuments.embedding.hierarchical')} · ${sourceData?.rules?.parent_mode === 'paragraph'
  77. ? t('dataset.parentMode.paragraph')
  78. : t('dataset.parentMode.fullDoc')}`
  79. break
  80. case 'segmentLength':
  81. value = !sourceData?.mode
  82. ? value
  83. : sourceData.mode === ProcessMode.general
  84. ? maxTokens
  85. : `${t('datasetDocuments.embedding.parentMaxTokens')} ${maxTokens}; ${t('datasetDocuments.embedding.childMaxTokens')} ${childMaxTokens}`
  86. break
  87. default:
  88. value = !sourceData?.mode
  89. ? value
  90. : sourceData?.rules?.pre_processing_rules?.filter(rule =>
  91. rule.enabled).map(rule => getRuleName(rule.id)).join(',')
  92. break
  93. }
  94. return value
  95. // eslint-disable-next-line react-hooks/exhaustive-deps
  96. }, [sourceData])
  97. return <div className='flex flex-col gap-1'>
  98. {Object.keys(segmentationRuleMap).map((field) => {
  99. return <FieldInfo
  100. key={field}
  101. label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]}
  102. displayedValue={String(getValue(field))}
  103. />
  104. })}
  105. <FieldInfo
  106. label={t('datasetCreation.stepTwo.indexMode')}
  107. displayedValue={t(`datasetCreation.stepTwo.${indexingType === IndexingType.ECONOMICAL ? 'economical' : 'qualified'}`) as string}
  108. valueIcon={
  109. <Image
  110. className='size-4'
  111. src={
  112. indexingType === IndexingType.ECONOMICAL
  113. ? indexMethodIcon.economical
  114. : indexMethodIcon.high_quality
  115. }
  116. alt=''
  117. />
  118. }
  119. />
  120. <FieldInfo
  121. label={t('datasetSettings.form.retrievalSetting.title')}
  122. // displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
  123. displayedValue={t(`dataset.retrieval.${indexingType === IndexingType.ECONOMICAL ? 'invertedIndex' : retrievalMethod}.title`) as string}
  124. valueIcon={
  125. <Image
  126. className='size-4'
  127. src={
  128. retrievalMethod === RETRIEVE_METHOD.fullText
  129. ? retrievalIcon.fullText
  130. : retrievalMethod === RETRIEVE_METHOD.hybrid
  131. ? retrievalIcon.hybrid
  132. : retrievalIcon.vector
  133. }
  134. alt=''
  135. />
  136. }
  137. />
  138. </div>
  139. }
  140. const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
  141. const { t } = useTranslation()
  142. const { enableBilling, plan } = useProviderContext()
  143. const getFirstDocument = documents[0]
  144. const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([])
  145. const fetchIndexingStatus = async () => {
  146. const status = await doFetchIndexingStatus({ datasetId, batchId })
  147. setIndexingStatusDetail(status.data)
  148. return status.data
  149. }
  150. const [isStopQuery, setIsStopQuery] = useState(false)
  151. const isStopQueryRef = useRef(isStopQuery)
  152. useEffect(() => {
  153. isStopQueryRef.current = isStopQuery
  154. }, [isStopQuery])
  155. const stopQueryStatus = () => {
  156. setIsStopQuery(true)
  157. }
  158. const startQueryStatus = async () => {
  159. if (isStopQueryRef.current)
  160. return
  161. try {
  162. const indexingStatusBatchDetail = await fetchIndexingStatus()
  163. const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status))
  164. if (isCompleted) {
  165. stopQueryStatus()
  166. return
  167. }
  168. await sleep(2500)
  169. await startQueryStatus()
  170. }
  171. catch (e) {
  172. await sleep(2500)
  173. await startQueryStatus()
  174. }
  175. }
  176. useEffect(() => {
  177. setIsStopQuery(false)
  178. startQueryStatus()
  179. return () => {
  180. stopQueryStatus()
  181. }
  182. // eslint-disable-next-line react-hooks/exhaustive-deps
  183. }, [])
  184. // get rule
  185. const { data: ruleDetail } = useSWR({
  186. action: 'fetchProcessRule',
  187. params: { documentId: getFirstDocument.id },
  188. }, apiParams => fetchProcessRule(omit(apiParams, 'action')), {
  189. revalidateOnFocus: false,
  190. })
  191. const router = useRouter()
  192. const navToDocumentList = () => {
  193. router.push(`/datasets/${datasetId}/documents`)
  194. }
  195. const navToApiDocs = () => {
  196. router.push('/datasets?category=api')
  197. }
  198. const isEmbedding = useMemo(() => {
  199. return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
  200. }, [indexingStatusBatchDetail])
  201. const isEmbeddingCompleted = useMemo(() => {
  202. return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || ''))
  203. }, [indexingStatusBatchDetail])
  204. const getSourceName = (id: string) => {
  205. const doc = documents.find(document => document.id === id)
  206. return doc?.name
  207. }
  208. const getFileType = (name?: string) => name?.split('.').pop() || 'txt'
  209. const getSourcePercent = (detail: IndexingStatusResponse) => {
  210. const completedCount = detail.completed_segments || 0
  211. const totalCount = detail.total_segments || 0
  212. if (totalCount === 0)
  213. return 0
  214. const percent = Math.round(completedCount * 100 / totalCount)
  215. return percent > 100 ? 100 : percent
  216. }
  217. const getSourceType = (id: string) => {
  218. const doc = documents.find(document => document.id === id)
  219. return doc?.data_source_type as DataSourceType
  220. }
  221. const getIcon = (id: string) => {
  222. const doc = documents.find(document => document.id === id)
  223. return doc?.data_source_info.notion_page_icon
  224. }
  225. const isSourceEmbedding = (detail: IndexingStatusResponse) =>
  226. ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
  227. return (
  228. <>
  229. <div className="h-5 flex items-center mb-3">
  230. <div className="flex items-center justify-between text-gray-900 font-medium text-sm mr-2">
  231. {isEmbedding && <div className='flex items-center'>
  232. <RiLoader2Fill className='size-4 mr-1 animate-spin' />
  233. {t('datasetDocuments.embedding.processing')}
  234. </div>}
  235. {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
  236. </div>
  237. </div>
  238. {
  239. enableBilling && plan.type !== Plan.team && (
  240. <div className='flex items-center mb-3 p-3 h-14 bg-white border-[0.5px] border-black/5 shadow-md rounded-xl'>
  241. <div className='shrink-0 flex items-center justify-center w-8 h-8 bg-[#FFF6ED] rounded-lg'>
  242. <ZapFast className='w-4 h-4 text-[#FB6514]' />
  243. </div>
  244. <div className='grow mx-3 text-[13px] font-medium text-gray-700'>
  245. {t('billing.plansCommon.documentProcessingPriorityUpgrade')}
  246. </div>
  247. <UpgradeBtn loc='knowledge-speed-up' />
  248. </div>
  249. )
  250. }
  251. <div className="flex flex-col gap-0.5 pb-2">
  252. {indexingStatusBatchDetail.map(indexingStatusDetail => (
  253. <div key={indexingStatusDetail.id} className={cn(
  254. 'relative h-[26px] bg-components-progress-bar-bg rounded-md overflow-hidden',
  255. indexingStatusDetail.indexing_status === 'error' && 'bg-state-destructive-hover-alt',
  256. // indexingStatusDetail.indexing_status === 'completed' && 's.success',
  257. )}>
  258. {isSourceEmbedding(indexingStatusDetail) && (
  259. <div className="absolute top-0 left-0 h-full min-w-0.5 bg-components-progress-bar-progress border-r-[2px] border-r-components-progress-bar-progress-highlight" style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} />
  260. )}
  261. <div className="flex gap-1 pl-[6px] pr-2 h-full items-center z-[1]">
  262. {getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && (
  263. // <div className={cn(
  264. // 'shrink-0 marker:size-4 bg-center bg-no-repeat bg-contain',
  265. // s[getFileType(getSourceName(indexingStatusDetail.id))] || s.unknownFileIcon,
  266. // )} />
  267. <DocumentFileIcon
  268. className="shrink-0 size-4"
  269. name={getSourceName(indexingStatusDetail.id)}
  270. extension={getFileType(getSourceName(indexingStatusDetail.id))}
  271. />
  272. )}
  273. {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && (
  274. <NotionIcon
  275. className='shrink-0'
  276. type='page'
  277. src={getIcon(indexingStatusDetail.id)}
  278. />
  279. )}
  280. <div className="grow flex items-center gap-1 w-0" title={getSourceName(indexingStatusDetail.id)}>
  281. <div className="text-xs truncate">
  282. {getSourceName(indexingStatusDetail.id)}
  283. </div>
  284. {
  285. enableBilling && (
  286. <PriorityLabel className='ml-0' />
  287. )
  288. }
  289. </div>
  290. {isSourceEmbedding(indexingStatusDetail) && (
  291. <div className="shrink-0 text-xs">{`${getSourcePercent(indexingStatusDetail)}%`}</div>
  292. )}
  293. {indexingStatusDetail.indexing_status === 'error' && (
  294. <Tooltip
  295. popupClassName='px-4 py-[14px] max-w-60 text-sm leading-4 text-text-secondary border-[0.5px] border-components-panel-border rounded-xl'
  296. offset={4}
  297. popupContent={indexingStatusDetail.error}
  298. >
  299. <span>
  300. <RiErrorWarningFill className='shrink-0 size-4 text-text-destructive' />
  301. </span>
  302. </Tooltip>
  303. )}
  304. {indexingStatusDetail.indexing_status === 'completed' && (
  305. <RiCheckboxCircleFill className='shrink-0 size-4 text-text-success' />
  306. )}
  307. </div>
  308. </div>
  309. ))}
  310. </div>
  311. <hr className="my-3 h-[1px] bg-divider-subtle border-0" />
  312. <RuleDetail
  313. sourceData={ruleDetail}
  314. indexingType={indexingType}
  315. retrievalMethod={retrievalMethod}
  316. />
  317. <div className='flex items-center gap-2 my-10'>
  318. <Button className='w-fit' onClick={navToApiDocs}>
  319. <RiTerminalBoxLine className='size-4 mr-2' />
  320. <span>Access the API</span>
  321. </Button>
  322. <Button className='w-fit' variant='primary' onClick={navToDocumentList}>
  323. <span>{t('datasetCreation.stepThree.navTo')}</span>
  324. <ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
  325. </Button>
  326. </div>
  327. </>
  328. )
  329. }
  330. export default EmbeddingProcess