index.tsx 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177
  1. 'use client'
  2. import type { FC, PropsWithChildren } from 'react'
  3. import React, { useCallback, useEffect, useRef, useState } from 'react'
  4. import { useTranslation } from 'react-i18next'
  5. import { useContext } from 'use-context-selector'
  6. import {
  7. RiAlertFill,
  8. RiArrowLeftLine,
  9. RiSearchEyeLine,
  10. } from '@remixicon/react'
  11. import Link from 'next/link'
  12. import Image from 'next/image'
  13. import { useHover } from 'ahooks'
  14. import SettingCog from '../assets/setting-gear-mod.svg'
  15. import OrangeEffect from '../assets/option-card-effect-orange.svg'
  16. import FamilyMod from '../assets/family-mod.svg'
  17. import Note from '../assets/note-mod.svg'
  18. import FileList from '../assets/file-list-3-fill.svg'
  19. import { indexMethodIcon } from '../icons'
  20. import { PreviewContainer } from '../../preview/container'
  21. import { ChunkContainer, QAPreview } from '../../chunk'
  22. import { PreviewHeader } from '../../preview/header'
  23. import { FormattedText } from '../../formatted-text/formatted'
  24. import { PreviewSlice } from '../../formatted-text/flavours/preview-slice'
  25. import PreviewDocumentPicker from '../../common/document-picker/preview-document-picker'
  26. import s from './index.module.css'
  27. import unescape from './unescape'
  28. import escape from './escape'
  29. import { OptionCard } from './option-card'
  30. import LanguageSelect from './language-select'
  31. import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
  32. import cn from '@/utils/classnames'
  33. import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
  34. import Button from '@/app/components/base/button'
  35. import FloatRightContainer from '@/app/components/base/float-right-container'
  36. import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
  37. import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
  38. import { type RetrievalConfig } from '@/types/app'
  39. import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
  40. import Toast from '@/app/components/base/toast'
  41. import type { NotionPage } from '@/models/common'
  42. import { DataSourceProvider } from '@/models/common'
  43. import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets'
  44. import { useDatasetDetailContext } from '@/context/dataset-detail'
  45. import I18n from '@/context/i18n'
  46. import { RETRIEVE_METHOD } from '@/types/app'
  47. import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
  48. import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
  49. import { LanguagesSupported } from '@/i18n/language'
  50. import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
  51. import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
  52. import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
  53. import Checkbox from '@/app/components/base/checkbox'
  54. import RadioCard from '@/app/components/base/radio-card'
  55. import { IS_CE_EDITION } from '@/config'
  56. import Divider from '@/app/components/base/divider'
  57. import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/knowledge/use-create-dataset'
  58. import Badge from '@/app/components/base/badge'
  59. import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
  60. import Tooltip from '@/app/components/base/tooltip'
  61. import CustomDialog from '@/app/components/base/dialog'
  62. import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
  63. import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
  64. const TextLabel: FC<PropsWithChildren> = (props) => {
  65. return <label className='text-text-secondary system-sm-semibold'>{props.children}</label>
  66. }
  67. type StepTwoProps = {
  68. isSetting?: boolean
  69. documentDetail?: FullDocumentDetail
  70. isAPIKeySet: boolean
  71. onSetting: () => void
  72. datasetId?: string
  73. indexingType?: IndexingType
  74. retrievalMethod?: string
  75. dataSourceType: DataSourceType
  76. files: CustomFile[]
  77. notionPages?: NotionPage[]
  78. websitePages?: CrawlResultItem[]
  79. crawlOptions?: CrawlOptions
  80. websiteCrawlProvider?: DataSourceProvider
  81. websiteCrawlJobId?: string
  82. onStepChange?: (delta: number) => void
  83. updateIndexingTypeCache?: (type: string) => void
  84. updateRetrievalMethodCache?: (method: string) => void
  85. updateResultCache?: (res: createDocumentResponse) => void
  86. onSave?: () => void
  87. onCancel?: () => void
  88. }
  89. export enum SegmentType {
  90. AUTO = 'automatic',
  91. CUSTOM = 'custom',
  92. }
  93. export enum IndexingType {
  94. QUALIFIED = 'high_quality',
  95. ECONOMICAL = 'economy',
  96. }
  97. const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
  98. const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
  99. const DEFAULT_OVERLAP = 50
  100. type ParentChildConfig = {
  101. chunkForContext: ParentMode
  102. parent: {
  103. delimiter: string
  104. maxLength: number
  105. }
  106. child: {
  107. delimiter: string
  108. maxLength: number
  109. }
  110. }
  111. const defaultParentChildConfig: ParentChildConfig = {
  112. chunkForContext: 'paragraph',
  113. parent: {
  114. delimiter: '\\n\\n',
  115. maxLength: 500,
  116. },
  117. child: {
  118. delimiter: '\\n',
  119. maxLength: 200,
  120. },
  121. }
  122. const StepTwo = ({
  123. isSetting,
  124. documentDetail,
  125. isAPIKeySet,
  126. onSetting,
  127. datasetId,
  128. indexingType,
  129. dataSourceType: inCreatePageDataSourceType,
  130. files,
  131. notionPages = [],
  132. websitePages = [],
  133. crawlOptions,
  134. websiteCrawlProvider = DataSourceProvider.fireCrawl,
  135. websiteCrawlJobId = '',
  136. onStepChange,
  137. updateIndexingTypeCache,
  138. updateResultCache,
  139. onSave,
  140. onCancel,
  141. updateRetrievalMethodCache,
  142. }: StepTwoProps) => {
  143. const { t } = useTranslation()
  144. const { locale } = useContext(I18n)
  145. const media = useBreakpoints()
  146. const isMobile = media === MediaType.mobile
  147. const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
  148. const isInUpload = Boolean(currentDataset)
  149. const isUploadInEmptyDataset = isInUpload && !currentDataset?.doc_form
  150. const isNotUploadInEmptyDataset = !isUploadInEmptyDataset
  151. const isInInit = !isInUpload && !isSetting
  152. const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
  153. const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
  154. const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.CUSTOM)
  155. const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
  156. const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
  157. doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
  158. }, [])
  159. const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
  160. const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
  161. const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
  162. const [rules, setRules] = useState<PreProcessingRule[]>([])
  163. const [defaultConfig, setDefaultConfig] = useState<Rules>()
  164. const hasSetIndexType = !!indexingType
  165. const [indexType, setIndexType] = useState<IndexingType>(
  166. (indexingType
  167. || isAPIKeySet)
  168. ? IndexingType.QUALIFIED
  169. : IndexingType.ECONOMICAL,
  170. )
  171. const [previewFile, setPreviewFile] = useState<DocumentItem>(
  172. (datasetId && documentDetail)
  173. ? documentDetail.file
  174. : files[0],
  175. )
  176. const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(
  177. (datasetId && documentDetail)
  178. ? documentDetail.notion_page
  179. : notionPages[0],
  180. )
  181. const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(
  182. (datasetId && documentDetail)
  183. ? documentDetail.website_page
  184. : websitePages[0],
  185. )
  186. // QA Related
  187. const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false)
  188. const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false)
  189. const [docForm, setDocForm] = useState<ChunkingMode>(
  190. (datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text,
  191. )
  192. const handleChangeDocform = (value: ChunkingMode) => {
  193. if (value === ChunkingMode.qa && indexType === IndexingType.ECONOMICAL) {
  194. setIsQAConfirmDialogOpen(true)
  195. return
  196. }
  197. if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL)
  198. setIndexType(IndexingType.QUALIFIED)
  199. setDocForm(value)
  200. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  201. currentEstimateMutation.reset()
  202. }
  203. const [docLanguage, setDocLanguage] = useState<string>(
  204. (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'),
  205. )
  206. const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
  207. const getIndexing_technique = () => indexingType || indexType
  208. const currentDocForm = currentDataset?.doc_form || docForm
  209. const getProcessRule = (): ProcessRule => {
  210. if (currentDocForm === ChunkingMode.parentChild) {
  211. return {
  212. rules: {
  213. pre_processing_rules: rules,
  214. segmentation: {
  215. separator: unescape(
  216. parentChildConfig.parent.delimiter,
  217. ),
  218. max_tokens: parentChildConfig.parent.maxLength,
  219. },
  220. parent_mode: parentChildConfig.chunkForContext,
  221. subchunk_segmentation: {
  222. separator: unescape(parentChildConfig.child.delimiter),
  223. max_tokens: parentChildConfig.child.maxLength,
  224. },
  225. },
  226. mode: 'hierarchical',
  227. } as ProcessRule
  228. }
  229. return {
  230. rules: {
  231. pre_processing_rules: rules,
  232. segmentation: {
  233. separator: unescape(segmentIdentifier),
  234. max_tokens: maxChunkLength,
  235. chunk_overlap: overlap,
  236. },
  237. }, // api will check this. It will be removed after api refactored.
  238. mode: segmentationType,
  239. } as ProcessRule
  240. }
  241. const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({
  242. docForm: currentDocForm,
  243. docLanguage,
  244. dataSourceType: DataSourceType.FILE,
  245. files: previewFile
  246. ? [files.find(file => file.name === previewFile.name)!]
  247. : files,
  248. indexingTechnique: getIndexing_technique() as any,
  249. processRule: getProcessRule(),
  250. dataset_id: datasetId!,
  251. })
  252. const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({
  253. docForm: currentDocForm,
  254. docLanguage,
  255. dataSourceType: DataSourceType.NOTION,
  256. notionPages: [previewNotionPage],
  257. indexingTechnique: getIndexing_technique() as any,
  258. processRule: getProcessRule(),
  259. dataset_id: datasetId || '',
  260. })
  261. const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({
  262. docForm: currentDocForm,
  263. docLanguage,
  264. dataSourceType: DataSourceType.WEB,
  265. websitePages: [previewWebsitePage],
  266. crawlOptions,
  267. websiteCrawlProvider,
  268. websiteCrawlJobId,
  269. indexingTechnique: getIndexing_technique() as any,
  270. processRule: getProcessRule(),
  271. dataset_id: datasetId || '',
  272. })
  273. const currentEstimateMutation = dataSourceType === DataSourceType.FILE
  274. ? fileIndexingEstimateQuery
  275. : dataSourceType === DataSourceType.NOTION
  276. ? notionIndexingEstimateQuery
  277. : websiteIndexingEstimateQuery
  278. const fetchEstimate = useCallback(() => {
  279. if (dataSourceType === DataSourceType.FILE)
  280. fileIndexingEstimateQuery.mutate()
  281. if (dataSourceType === DataSourceType.NOTION)
  282. notionIndexingEstimateQuery.mutate()
  283. if (dataSourceType === DataSourceType.WEB)
  284. websiteIndexingEstimateQuery.mutate()
  285. }, [dataSourceType, fileIndexingEstimateQuery, notionIndexingEstimateQuery, websiteIndexingEstimateQuery])
  286. const estimate
  287. = dataSourceType === DataSourceType.FILE
  288. ? fileIndexingEstimateQuery.data
  289. : dataSourceType === DataSourceType.NOTION
  290. ? notionIndexingEstimateQuery.data
  291. : websiteIndexingEstimateQuery.data
  292. const getRuleName = (key: string) => {
  293. if (key === 'remove_extra_spaces')
  294. return t('datasetCreation.stepTwo.removeExtraSpaces')
  295. if (key === 'remove_urls_emails')
  296. return t('datasetCreation.stepTwo.removeUrlEmails')
  297. if (key === 'remove_stopwords')
  298. return t('datasetCreation.stepTwo.removeStopwords')
  299. }
  300. const ruleChangeHandle = (id: string) => {
  301. const newRules = rules.map((rule) => {
  302. if (rule.id === id) {
  303. return {
  304. id: rule.id,
  305. enabled: !rule.enabled,
  306. }
  307. }
  308. return rule
  309. })
  310. setRules(newRules)
  311. }
  312. const resetRules = () => {
  313. if (defaultConfig) {
  314. setSegmentIdentifier(defaultConfig.segmentation.separator)
  315. setMaxChunkLength(defaultConfig.segmentation.max_tokens)
  316. setOverlap(defaultConfig.segmentation.chunk_overlap!)
  317. setRules(defaultConfig.pre_processing_rules)
  318. }
  319. setParentChildConfig(defaultParentChildConfig)
  320. }
  321. const updatePreview = () => {
  322. if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) {
  323. Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
  324. return
  325. }
  326. fetchEstimate()
  327. }
  328. const {
  329. modelList: rerankModelList,
  330. defaultModel: rerankDefaultModel,
  331. currentModel: isRerankDefaultModelValid,
  332. } = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
  333. const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
  334. const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
  335. const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
  336. currentDataset?.embedding_model
  337. ? {
  338. provider: currentDataset.embedding_model_provider,
  339. model: currentDataset.embedding_model,
  340. }
  341. : {
  342. provider: defaultEmbeddingModel?.provider.provider || '',
  343. model: defaultEmbeddingModel?.model || '',
  344. },
  345. )
  346. const getCreationParams = () => {
  347. let params
  348. if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
  349. Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
  350. return
  351. }
  352. if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
  353. Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
  354. return
  355. }
  356. if (isSetting) {
  357. params = {
  358. original_document_id: documentDetail?.id,
  359. doc_form: currentDocForm,
  360. doc_language: docLanguage,
  361. process_rule: getProcessRule(),
  362. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  363. retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
  364. embedding_model: embeddingModel.model, // Readonly
  365. embedding_model_provider: embeddingModel.provider, // Readonly
  366. indexing_technique: getIndexing_technique(),
  367. } as CreateDocumentReq
  368. }
  369. else { // create
  370. const indexMethod = getIndexing_technique()
  371. if (
  372. !isReRankModelSelected({
  373. rerankDefaultModel,
  374. isRerankDefaultModelValid: !!isRerankDefaultModelValid,
  375. rerankModelList,
  376. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  377. retrievalConfig,
  378. indexMethod: indexMethod as string,
  379. })
  380. ) {
  381. Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
  382. return
  383. }
  384. const postRetrievalConfig = ensureRerankModelSelected({
  385. rerankDefaultModel: rerankDefaultModel!,
  386. retrievalConfig: {
  387. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  388. ...retrievalConfig,
  389. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  390. reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
  391. },
  392. indexMethod: indexMethod as string,
  393. })
  394. params = {
  395. data_source: {
  396. type: dataSourceType,
  397. info_list: {
  398. data_source_type: dataSourceType,
  399. },
  400. },
  401. indexing_technique: getIndexing_technique(),
  402. process_rule: getProcessRule(),
  403. doc_form: currentDocForm,
  404. doc_language: docLanguage,
  405. retrieval_model: postRetrievalConfig,
  406. embedding_model: embeddingModel.model,
  407. embedding_model_provider: embeddingModel.provider,
  408. } as CreateDocumentReq
  409. if (dataSourceType === DataSourceType.FILE) {
  410. params.data_source.info_list.file_info_list = {
  411. file_ids: files.map(file => file.id || '').filter(Boolean),
  412. }
  413. }
  414. if (dataSourceType === DataSourceType.NOTION)
  415. params.data_source.info_list.notion_info_list = getNotionInfo(notionPages)
  416. if (dataSourceType === DataSourceType.WEB) {
  417. params.data_source.info_list.website_info_list = getWebsiteInfo({
  418. websiteCrawlProvider,
  419. websiteCrawlJobId,
  420. websitePages,
  421. })
  422. }
  423. }
  424. return params
  425. }
  426. const fetchDefaultProcessRuleMutation = useFetchDefaultProcessRule({
  427. onSuccess(data) {
  428. const separator = data.rules.segmentation.separator
  429. setSegmentIdentifier(separator)
  430. setMaxChunkLength(data.rules.segmentation.max_tokens)
  431. setOverlap(data.rules.segmentation.chunk_overlap!)
  432. setRules(data.rules.pre_processing_rules)
  433. setDefaultConfig(data.rules)
  434. setLimitMaxChunkLength(data.limits.indexing_max_segmentation_tokens_length)
  435. },
  436. onError(error) {
  437. Toast.notify({
  438. type: 'error',
  439. message: `${error}`,
  440. })
  441. },
  442. })
  443. const getRulesFromDetail = () => {
  444. if (documentDetail) {
  445. const rules = documentDetail.dataset_process_rule.rules
  446. const separator = rules.segmentation.separator
  447. const max = rules.segmentation.max_tokens
  448. const overlap = rules.segmentation.chunk_overlap
  449. setSegmentIdentifier(separator)
  450. setMaxChunkLength(max)
  451. setOverlap(overlap!)
  452. setRules(rules.pre_processing_rules)
  453. setDefaultConfig(rules)
  454. }
  455. }
  456. const getDefaultMode = () => {
  457. if (documentDetail)
  458. // @ts-expect-error fix after api refactored
  459. setSegmentationType(documentDetail.dataset_process_rule.mode)
  460. }
  461. const createFirstDocumentMutation = useCreateFirstDocument({
  462. onError(error) {
  463. Toast.notify({
  464. type: 'error',
  465. message: `${error}`,
  466. })
  467. },
  468. })
  469. const createDocumentMutation = useCreateDocument(datasetId!, {
  470. onError(error) {
  471. Toast.notify({
  472. type: 'error',
  473. message: `${error}`,
  474. })
  475. },
  476. })
  477. const isCreating = createFirstDocumentMutation.isPending || createDocumentMutation.isPending
  478. const createHandle = async () => {
  479. const params = getCreationParams()
  480. if (!params)
  481. return false
  482. if (!datasetId) {
  483. await createFirstDocumentMutation.mutateAsync(
  484. params,
  485. {
  486. onSuccess(data) {
  487. updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
  488. updateResultCache && updateResultCache(data)
  489. // eslint-disable-next-line @typescript-eslint/no-use-before-define
  490. updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
  491. },
  492. },
  493. )
  494. }
  495. else {
  496. await createDocumentMutation.mutateAsync(params, {
  497. onSuccess(data) {
  498. updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
  499. updateResultCache && updateResultCache(data)
  500. },
  501. })
  502. }
  503. if (mutateDatasetRes)
  504. mutateDatasetRes()
  505. onStepChange && onStepChange(+1)
  506. isSetting && onSave && onSave()
  507. }
  508. const changeToEconomicalType = () => {
  509. if (docForm !== ChunkingMode.text)
  510. return
  511. if (!hasSetIndexType)
  512. setIndexType(IndexingType.ECONOMICAL)
  513. }
  514. useEffect(() => {
  515. // fetch rules
  516. if (!isSetting) {
  517. fetchDefaultProcessRuleMutation.mutate('/datasets/process-rule')
  518. }
  519. else {
  520. getRulesFromDetail()
  521. getDefaultMode()
  522. }
  523. // eslint-disable-next-line react-hooks/exhaustive-deps
  524. }, [])
  525. useEffect(() => {
  526. // get indexing type by props
  527. if (indexingType)
  528. setIndexType(indexingType as IndexingType)
  529. else
  530. setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
  531. }, [isAPIKeySet, indexingType, datasetId])
  532. const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
  533. search_method: RETRIEVE_METHOD.semantic,
  534. reranking_enable: false,
  535. reranking_model: {
  536. reranking_provider_name: rerankDefaultModel?.provider.provider,
  537. reranking_model_name: rerankDefaultModel?.model,
  538. },
  539. top_k: 3,
  540. score_threshold_enabled: false,
  541. score_threshold: 0.5,
  542. } as RetrievalConfig)
  543. const economyDomRef = useRef<HTMLDivElement>(null)
  544. const isHoveringEconomy = useHover(economyDomRef)
  545. return (
  546. <div className='flex w-full h-full'>
  547. <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}>
  548. <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.segmentation')}</div>
  549. {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form))
  550. || isUploadInEmptyDataset
  551. || isInInit)
  552. && <OptionCard
  553. className='bg-background-section mb-2'
  554. title={t('datasetCreation.stepTwo.general')}
  555. icon={<Image width={20} height={20} src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
  556. activeHeaderClassName='bg-dataset-option-card-blue-gradient'
  557. description={t('datasetCreation.stepTwo.generalTip')}
  558. isActive={
  559. [ChunkingMode.text, ChunkingMode.qa].includes(currentDocForm)
  560. }
  561. onSwitched={() =>
  562. handleChangeDocform(ChunkingMode.text)
  563. }
  564. actions={
  565. <>
  566. <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
  567. <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
  568. {t('datasetCreation.stepTwo.previewChunk')}
  569. </Button>
  570. <Button variant={'ghost'} onClick={resetRules}>
  571. {t('datasetCreation.stepTwo.reset')}
  572. </Button>
  573. </>
  574. }
  575. noHighlight={isInUpload && isNotUploadInEmptyDataset}
  576. >
  577. <div className='flex flex-col gap-y-4'>
  578. <div className='flex gap-3'>
  579. <DelimiterInput
  580. value={segmentIdentifier}
  581. onChange={e => setSegmentIdentifier(e.target.value, true)}
  582. />
  583. <MaxLengthInput
  584. unit='tokens'
  585. value={maxChunkLength}
  586. onChange={setMaxChunkLength}
  587. />
  588. <OverlapInput
  589. unit='tokens'
  590. value={overlap}
  591. min={1}
  592. onChange={setOverlap}
  593. />
  594. </div>
  595. <div className='w-full flex flex-col'>
  596. <div className='flex items-center gap-x-2'>
  597. <div className='inline-flex shrink-0'>
  598. <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
  599. </div>
  600. <Divider className='grow' bgStyle='gradient' />
  601. </div>
  602. <div className='mt-1'>
  603. {rules.map(rule => (
  604. <div key={rule.id} className={s.ruleItem} onClick={() => {
  605. ruleChangeHandle(rule.id)
  606. }}>
  607. <Checkbox
  608. checked={rule.enabled}
  609. />
  610. <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
  611. </div>
  612. ))}
  613. {IS_CE_EDITION && <>
  614. <Divider type='horizontal' className='my-4 bg-divider-subtle' />
  615. <div className='flex items-center py-0.5'>
  616. <div className='flex items-center' onClick={() => {
  617. if (currentDataset?.doc_form)
  618. return
  619. if (docForm === ChunkingMode.qa)
  620. handleChangeDocform(ChunkingMode.text)
  621. else
  622. handleChangeDocform(ChunkingMode.qa)
  623. }}>
  624. <Checkbox
  625. checked={currentDocForm === ChunkingMode.qa}
  626. disabled={!!currentDataset?.doc_form}
  627. />
  628. <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">
  629. {t('datasetCreation.stepTwo.useQALanguage')}
  630. </label>
  631. </div>
  632. <LanguageSelect
  633. currentLanguage={docLanguage || locale}
  634. onSelect={setDocLanguage}
  635. disabled={currentDocForm !== ChunkingMode.qa}
  636. />
  637. <Tooltip popupContent={t('datasetCreation.stepTwo.QATip')} />
  638. </div>
  639. {currentDocForm === ChunkingMode.qa && (
  640. <div
  641. style={{
  642. background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
  643. }}
  644. className='h-10 mt-2 flex items-center gap-2 rounded-xl backdrop-blur-[5px] border-components-panel-border border shadow-xs px-3 text-xs'
  645. >
  646. <RiAlertFill className='size-4 text-text-warning-secondary' />
  647. <span className='system-xs-medium text-text-primary'>
  648. {t('datasetCreation.stepTwo.QATip')}
  649. </span>
  650. </div>
  651. )}
  652. </>}
  653. </div>
  654. </div>
  655. </div>
  656. </OptionCard>}
  657. {
  658. (
  659. (isInUpload && currentDataset!.doc_form === ChunkingMode.parentChild)
  660. || isUploadInEmptyDataset
  661. || isInInit
  662. )
  663. && <OptionCard
  664. title={t('datasetCreation.stepTwo.parentChild')}
  665. icon={<Image width={20} height={20} src={FamilyMod} alt={t('datasetCreation.stepTwo.parentChild')} />}
  666. effectImg={OrangeEffect.src}
  667. activeHeaderClassName='bg-dataset-option-card-orange-gradient'
  668. description={t('datasetCreation.stepTwo.parentChildTip')}
  669. isActive={currentDocForm === ChunkingMode.parentChild}
  670. onSwitched={() => handleChangeDocform(ChunkingMode.parentChild)}
  671. actions={
  672. <>
  673. <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
  674. <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
  675. {t('datasetCreation.stepTwo.previewChunk')}
  676. </Button>
  677. <Button variant={'ghost'} onClick={resetRules}>
  678. {t('datasetCreation.stepTwo.reset')}
  679. </Button>
  680. </>
  681. }
  682. noHighlight={isInUpload && isNotUploadInEmptyDataset}
  683. >
  684. <div className='flex flex-col gap-4'>
  685. <div>
  686. <div className='flex items-center gap-x-2'>
  687. <div className='inline-flex shrink-0'>
  688. <TextLabel>{t('datasetCreation.stepTwo.parentChunkForContext')}</TextLabel>
  689. </div>
  690. <Divider className='grow' bgStyle='gradient' />
  691. </div>
  692. <RadioCard className='mt-1'
  693. icon={<Image src={Note} alt='' />}
  694. title={t('datasetCreation.stepTwo.paragraph')}
  695. description={t('datasetCreation.stepTwo.paragraphTip')}
  696. isChosen={parentChildConfig.chunkForContext === 'paragraph'}
  697. onChosen={() => setParentChildConfig(
  698. {
  699. ...parentChildConfig,
  700. chunkForContext: 'paragraph',
  701. },
  702. )}
  703. chosenConfig={
  704. <div className='flex gap-3'>
  705. <DelimiterInput
  706. value={parentChildConfig.parent.delimiter}
  707. tooltip={t('datasetCreation.stepTwo.parentChildDelimiterTip')!}
  708. onChange={e => setParentChildConfig({
  709. ...parentChildConfig,
  710. parent: {
  711. ...parentChildConfig.parent,
  712. delimiter: e.target.value ? escape(e.target.value) : '',
  713. },
  714. })}
  715. />
  716. <MaxLengthInput
  717. unit='tokens'
  718. value={parentChildConfig.parent.maxLength}
  719. onChange={value => setParentChildConfig({
  720. ...parentChildConfig,
  721. parent: {
  722. ...parentChildConfig.parent,
  723. maxLength: value,
  724. },
  725. })}
  726. />
  727. </div>
  728. }
  729. />
  730. <RadioCard className='mt-2'
  731. icon={<Image src={FileList} alt='' />}
  732. title={t('datasetCreation.stepTwo.fullDoc')}
  733. description={t('datasetCreation.stepTwo.fullDocTip')}
  734. onChosen={() => setParentChildConfig(
  735. {
  736. ...parentChildConfig,
  737. chunkForContext: 'full-doc',
  738. },
  739. )}
  740. isChosen={parentChildConfig.chunkForContext === 'full-doc'}
  741. />
  742. </div>
  743. <div>
  744. <div className='flex items-center gap-x-2'>
  745. <div className='inline-flex shrink-0'>
  746. <TextLabel>{t('datasetCreation.stepTwo.childChunkForRetrieval')}</TextLabel>
  747. </div>
  748. <Divider className='grow' bgStyle='gradient' />
  749. </div>
  750. <div className='flex gap-3 mt-1'>
  751. <DelimiterInput
  752. value={parentChildConfig.child.delimiter}
  753. tooltip={t('datasetCreation.stepTwo.parentChildChunkDelimiterTip')!}
  754. onChange={e => setParentChildConfig({
  755. ...parentChildConfig,
  756. child: {
  757. ...parentChildConfig.child,
  758. delimiter: e.target.value ? escape(e.target.value) : '',
  759. },
  760. })}
  761. />
  762. <MaxLengthInput
  763. unit='tokens'
  764. value={parentChildConfig.child.maxLength}
  765. onChange={value => setParentChildConfig({
  766. ...parentChildConfig,
  767. child: {
  768. ...parentChildConfig.child,
  769. maxLength: value,
  770. },
  771. })}
  772. />
  773. </div>
  774. </div>
  775. <div>
  776. <div className='flex items-center gap-x-2'>
  777. <div className='inline-flex shrink-0'>
  778. <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
  779. </div>
  780. <Divider className='grow' bgStyle='gradient' />
  781. </div>
  782. <div className='mt-1'>
  783. {rules.map(rule => (
  784. <div key={rule.id} className={s.ruleItem} onClick={() => {
  785. ruleChangeHandle(rule.id)
  786. }}>
  787. <Checkbox
  788. checked={rule.enabled}
  789. />
  790. <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
  791. </div>
  792. ))}
  793. </div>
  794. </div>
  795. </div>
  796. </OptionCard>}
  797. <Divider className='my-5' />
  798. <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.indexMode')}</div>
  799. <div className='flex items-center gap-2'>
  800. {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
  801. <OptionCard className='flex-1'
  802. title={<div className='flex items-center'>
  803. {t('datasetCreation.stepTwo.qualified')}
  804. <Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase>
  805. {t('datasetCreation.stepTwo.recommend')}
  806. </Badge>
  807. <span className='ml-auto'>
  808. {!hasSetIndexType && <span className={cn(s.radio)} />}
  809. </span>
  810. </div>}
  811. description={t('datasetCreation.stepTwo.qualifiedTip')}
  812. icon={<Image src={indexMethodIcon.high_quality} alt='' />}
  813. isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
  814. disabled={!isAPIKeySet || hasSetIndexType}
  815. onSwitched={() => {
  816. if (isAPIKeySet)
  817. setIndexType(IndexingType.QUALIFIED)
  818. }}
  819. />
  820. )}
  821. {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
  822. <>
  823. <CustomDialog show={isQAConfirmDialogOpen} onClose={() => setIsQAConfirmDialogOpen(false)} className='w-[432px]'>
  824. <header className='pt-6 mb-4'>
  825. <h2 className='text-lg font-semibold'>
  826. {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')}
  827. </h2>
  828. <p className='font-normal text-sm mt-2'>
  829. {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')}
  830. </p>
  831. </header>
  832. <div className='flex gap-2 pb-6'>
  833. <Button className='ml-auto' onClick={() => {
  834. setIsQAConfirmDialogOpen(false)
  835. }}>
  836. {t('datasetCreation.stepTwo.cancel')}
  837. </Button>
  838. <Button variant={'primary'} onClick={() => {
  839. setIsQAConfirmDialogOpen(false)
  840. setIndexType(IndexingType.QUALIFIED)
  841. setDocForm(ChunkingMode.qa)
  842. }}>
  843. {t('datasetCreation.stepTwo.switch')}
  844. </Button>
  845. </div>
  846. </CustomDialog>
  847. <PortalToFollowElem
  848. open={
  849. isHoveringEconomy && docForm !== ChunkingMode.text
  850. }
  851. placement={'top'}
  852. >
  853. <PortalToFollowElemTrigger asChild>
  854. <OptionCard className='flex-1'
  855. title={t('datasetCreation.stepTwo.economical')}
  856. description={t('datasetCreation.stepTwo.economicalTip')}
  857. icon={<Image src={indexMethodIcon.economical} alt='' />}
  858. isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
  859. disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text}
  860. ref={economyDomRef}
  861. onSwitched={() => {
  862. if (isAPIKeySet && docForm === ChunkingMode.text)
  863. setIndexType(IndexingType.ECONOMICAL)
  864. }}
  865. />
  866. </PortalToFollowElemTrigger>
  867. <PortalToFollowElemContent>
  868. <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'>
  869. {
  870. docForm === ChunkingMode.qa
  871. ? t('datasetCreation.stepTwo.notAvailableForQA')
  872. : t('datasetCreation.stepTwo.notAvailableForParentChild')
  873. }
  874. </div>
  875. </PortalToFollowElemContent>
  876. </PortalToFollowElem>
  877. </>)}
  878. </div>
  879. {!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
  880. <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'>
  881. <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div>
  882. <div className='p-1'>
  883. <AlertTriangle className='size-4 text-text-warning-secondary' />
  884. </div>
  885. <span className='system-xs-medium'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
  886. </div>
  887. )}
  888. {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
  889. <div className='mt-2 system-xs-medium'>
  890. {t('datasetCreation.stepTwo.indexSettingTip')}
  891. <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
  892. </div>
  893. )}
  894. {/* Embedding model */}
  895. {indexType === IndexingType.QUALIFIED && (
  896. <div className='mt-5'>
  897. <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
  898. <ModelSelector
  899. readonly={!!datasetId}
  900. defaultModel={embeddingModel}
  901. modelList={embeddingModelList}
  902. onSelect={(model: DefaultModel) => {
  903. setEmbeddingModel(model)
  904. }}
  905. />
  906. {!!datasetId && (
  907. <div className='mt-2 system-xs-medium'>
  908. {t('datasetCreation.stepTwo.indexSettingTip')}
  909. <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
  910. </div>
  911. )}
  912. </div>
  913. )}
  914. <Divider className='my-5' />
  915. {/* Retrieval Method Config */}
  916. <div>
  917. {!datasetId
  918. ? (
  919. <div className={'mb-1'}>
  920. <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div>
  921. <div className='body-xs-regular text-text-tertiary'>
  922. <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
  923. {t('datasetSettings.form.retrievalSetting.longDescription')}
  924. </div>
  925. </div>
  926. )
  927. : (
  928. <div className={cn('system-md-semibold mb-0.5', 'flex justify-between items-center')}>
  929. <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
  930. </div>
  931. )}
  932. <div className=''>
  933. {
  934. getIndexing_technique() === IndexingType.QUALIFIED
  935. ? (
  936. <RetrievalMethodConfig
  937. value={retrievalConfig}
  938. onChange={setRetrievalConfig}
  939. />
  940. )
  941. : (
  942. <EconomicalRetrievalMethodConfig
  943. value={retrievalConfig}
  944. onChange={setRetrievalConfig}
  945. />
  946. )
  947. }
  948. </div>
  949. </div>
  950. {!isSetting
  951. ? (
  952. <div className='flex items-center mt-8 py-2'>
  953. <Button onClick={() => onStepChange && onStepChange(-1)}>
  954. <RiArrowLeftLine className='w-4 h-4 mr-1' />
  955. {t('datasetCreation.stepTwo.previousStep')}
  956. </Button>
  957. <Button className='ml-auto' loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
  958. </div>
  959. )
  960. : (
  961. <div className='flex items-center mt-8 py-2'>
  962. <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
  963. <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
  964. </div>
  965. )}
  966. </div>
  967. <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={() => { }} footer={null}>
  968. <PreviewContainer
  969. header={<PreviewHeader
  970. title={t('datasetCreation.stepTwo.preview')}
  971. >
  972. <div className='flex items-center gap-1'>
  973. {dataSourceType === DataSourceType.FILE
  974. && <PreviewDocumentPicker
  975. files={files as Array<Required<CustomFile>>}
  976. onChange={(selected) => {
  977. currentEstimateMutation.reset()
  978. setPreviewFile(selected)
  979. currentEstimateMutation.mutate()
  980. }}
  981. // when it is from setting, it just has one file
  982. value={isSetting ? (files[0]! as Required<CustomFile>) : previewFile}
  983. />
  984. }
  985. {dataSourceType === DataSourceType.NOTION
  986. && <PreviewDocumentPicker
  987. files={
  988. notionPages.map(page => ({
  989. id: page.page_id,
  990. name: page.page_name,
  991. extension: 'md',
  992. }))
  993. }
  994. onChange={(selected) => {
  995. currentEstimateMutation.reset()
  996. const selectedPage = notionPages.find(page => page.page_id === selected.id)
  997. setPreviewNotionPage(selectedPage!)
  998. currentEstimateMutation.mutate()
  999. }}
  1000. value={{
  1001. id: previewNotionPage?.page_id || '',
  1002. name: previewNotionPage?.page_name || '',
  1003. extension: 'md',
  1004. }}
  1005. />
  1006. }
  1007. {dataSourceType === DataSourceType.WEB
  1008. && <PreviewDocumentPicker
  1009. files={
  1010. websitePages.map(page => ({
  1011. id: page.source_url,
  1012. name: page.title,
  1013. extension: 'md',
  1014. }))
  1015. }
  1016. onChange={(selected) => {
  1017. currentEstimateMutation.reset()
  1018. const selectedPage = websitePages.find(page => page.source_url === selected.id)
  1019. setPreviewWebsitePage(selectedPage!)
  1020. currentEstimateMutation.mutate()
  1021. }}
  1022. value={
  1023. {
  1024. id: previewWebsitePage?.source_url || '',
  1025. name: previewWebsitePage?.title || '',
  1026. extension: 'md',
  1027. }
  1028. }
  1029. />
  1030. }
  1031. {
  1032. currentDocForm !== ChunkingMode.qa
  1033. && <Badge text={t(
  1034. 'datasetCreation.stepTwo.previewChunkCount', {
  1035. count: estimate?.total_segments || 0,
  1036. }) as string}
  1037. />
  1038. }
  1039. </div>
  1040. </PreviewHeader>}
  1041. className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')}
  1042. mainClassName='space-y-6'
  1043. >
  1044. {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
  1045. estimate?.qa_preview.map((item, index) => (
  1046. <ChunkContainer
  1047. key={item.question}
  1048. label={`Chunk-${index + 1}`}
  1049. characterCount={item.question.length + item.answer.length}
  1050. >
  1051. <QAPreview qa={item} />
  1052. </ChunkContainer>
  1053. ))
  1054. )}
  1055. {currentDocForm === ChunkingMode.text && estimate?.preview && (
  1056. estimate?.preview.map((item, index) => (
  1057. <ChunkContainer
  1058. key={item.content}
  1059. label={`Chunk-${index + 1}`}
  1060. characterCount={item.content.length}
  1061. >
  1062. {item.content}
  1063. </ChunkContainer>
  1064. ))
  1065. )}
  1066. {currentDocForm === ChunkingMode.parentChild && currentEstimateMutation.data?.preview && (
  1067. estimate?.preview?.map((item, index) => {
  1068. const indexForLabel = index + 1
  1069. return (
  1070. <ChunkContainer
  1071. key={item.content}
  1072. label={`Chunk-${indexForLabel}`}
  1073. characterCount={item.content.length}
  1074. >
  1075. <FormattedText>
  1076. {item.child_chunks.map((child, index) => {
  1077. const indexForLabel = index + 1
  1078. return (
  1079. <PreviewSlice
  1080. key={child}
  1081. label={`C-${indexForLabel}`}
  1082. text={child}
  1083. tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
  1084. labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
  1085. dividerClassName='leading-7'
  1086. />
  1087. )
  1088. })}
  1089. </FormattedText>
  1090. </ChunkContainer>
  1091. )
  1092. })
  1093. )}
  1094. {currentEstimateMutation.isIdle && (
  1095. <div className='h-full w-full flex items-center justify-center'>
  1096. <div className='flex flex-col items-center justify-center gap-3'>
  1097. <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
  1098. <p className='text-sm text-text-tertiary'>
  1099. {t('datasetCreation.stepTwo.previewChunkTip')}
  1100. </p>
  1101. </div>
  1102. </div>
  1103. )}
  1104. {currentEstimateMutation.isPending && (
  1105. <div className='space-y-6'>
  1106. {Array.from({ length: 10 }, (_, i) => (
  1107. <SkeletonContainer key={i}>
  1108. <SkeletonRow>
  1109. <SkeletonRectangle className="w-20" />
  1110. <SkeletonPoint />
  1111. <SkeletonRectangle className="w-24" />
  1112. </SkeletonRow>
  1113. <SkeletonRectangle className="w-full" />
  1114. <SkeletonRectangle className="w-full" />
  1115. <SkeletonRectangle className="w-[422px]" />
  1116. </SkeletonContainer>
  1117. ))}
  1118. </div>
  1119. )}
  1120. </PreviewContainer>
  1121. </FloatRightContainer>
  1122. </div>
  1123. )
  1124. }
  1125. export default StepTwo