index.tsx 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. 'use client'
  2. import React, { useState, useRef, useEffect, useLayoutEffect } from 'react'
  3. import { useTranslation } from 'react-i18next'
  4. import { useBoolean } from 'ahooks'
  5. import type { File, PreProcessingRule, Rules, FileIndexingEstimateResponse as IndexingEstimateResponse } from '@/models/datasets'
  6. import {
  7. fetchDefaultProcessRule,
  8. createFirstDocument,
  9. createDocument,
  10. fetchFileIndexingEstimate as didFetchFileIndexingEstimate,
  11. } from '@/service/datasets'
  12. import type { CreateDocumentReq, createDocumentResponse, FullDocumentDetail } from '@/models/datasets'
  13. import Button from '@/app/components/base/button'
  14. import PreviewItem from './preview-item'
  15. import Loading from '@/app/components/base/loading'
  16. import { XMarkIcon } from '@heroicons/react/20/solid'
  17. import cn from 'classnames'
  18. import s from './index.module.css'
  19. import Link from 'next/link'
  20. import Toast from '@/app/components/base/toast'
  21. import { formatNumber } from '@/utils/format'
  22. type StepTwoProps = {
  23. isSetting?: boolean,
  24. documentDetail?: FullDocumentDetail
  25. hasSetAPIKEY: boolean,
  26. onSetting: () => void,
  27. datasetId?: string,
  28. indexingType?: string,
  29. file?: File,
  30. onStepChange?: (delta: number) => void,
  31. updateIndexingTypeCache?: (type: string) => void,
  32. updateResultCache?: (res: createDocumentResponse) => void
  33. onSave?: () => void
  34. onCancel?: () => void
  35. }
  36. enum SegmentType {
  37. AUTO = 'automatic',
  38. CUSTOM = 'custom',
  39. }
  40. enum IndexingType {
  41. QUALIFIED = 'high_quality',
  42. ECONOMICAL = 'economy',
  43. }
  44. const StepTwo = ({
  45. isSetting,
  46. documentDetail,
  47. hasSetAPIKEY,
  48. onSetting,
  49. datasetId,
  50. indexingType,
  51. file,
  52. onStepChange,
  53. updateIndexingTypeCache,
  54. updateResultCache,
  55. onSave,
  56. onCancel,
  57. }: StepTwoProps) => {
  58. const { t } = useTranslation()
  59. const scrollRef = useRef<HTMLDivElement>(null)
  60. const [scrolled, setScrolled] = useState(false)
  61. const previewScrollRef = useRef<HTMLDivElement>(null)
  62. const [previewScrolled, setPreviewScrolled] = useState(false)
  63. const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
  64. const [segmentIdentifier, setSegmentIdentifier] = useState('\\n')
  65. const [max, setMax] = useState(1000)
  66. const [rules, setRules] = useState<PreProcessingRule[]>([])
  67. const [defaultConfig, setDefaultConfig] = useState<Rules>()
  68. const hasSetIndexType = !!indexingType
  69. const [indexType, setIndexType] = useState<IndexingType>(
  70. indexingType ||
  71. hasSetAPIKEY ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL
  72. )
  73. const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
  74. const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
  75. const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
  76. const fileIndexingEstimate = (() => {
  77. return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
  78. })()
  79. const scrollHandle = (e: any) => {
  80. if (e.target.scrollTop > 0) {
  81. setScrolled(true)
  82. } else {
  83. setScrolled(false)
  84. }
  85. }
  86. const previewScrollHandle = (e: any) => {
  87. if (e.target.scrollTop > 0) {
  88. setPreviewScrolled(true)
  89. } else {
  90. setPreviewScrolled(false)
  91. }
  92. }
  93. const getFileName = (name: string) => {
  94. const arr = name.split('.')
  95. return arr.slice(0, -1).join('.')
  96. }
  97. const getRuleName = (key: string) => {
  98. if (key === 'remove_extra_spaces') {
  99. return t('datasetCreation.stepTwo.removeExtraSpaces')
  100. }
  101. if (key === 'remove_urls_emails') {
  102. return t('datasetCreation.stepTwo.removeUrlEmails')
  103. }
  104. if (key === 'remove_stopwords') {
  105. return t('datasetCreation.stepTwo.removeStopwords')
  106. }
  107. }
  108. const ruleChangeHandle = (id: string) => {
  109. const newRules = rules.map(rule => {
  110. if (rule.id === id) {
  111. return {
  112. id: rule.id,
  113. enabled: !rule.enabled,
  114. }
  115. }
  116. return rule
  117. })
  118. setRules(newRules)
  119. }
  120. const resetRules = () => {
  121. if (defaultConfig) {
  122. setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n')
  123. setMax(defaultConfig.segmentation.max_tokens)
  124. setRules(defaultConfig.pre_processing_rules)
  125. }
  126. }
  127. const confirmChangeCustomConfig = async () => {
  128. setCustomFileIndexingEstimate(null)
  129. setShowPreview()
  130. await fetchFileIndexingEstimate()
  131. }
  132. const getIndexing_technique = () => indexingType ? indexingType : indexType
  133. const getProcessRule = () => {
  134. const processRule: any = {
  135. rules: {}, // api will check this. It will be removed after api refactored.
  136. mode: segmentationType,
  137. }
  138. if (segmentationType === SegmentType.CUSTOM) {
  139. const ruleObj = {
  140. pre_processing_rules: rules,
  141. segmentation: {
  142. separator: segmentIdentifier === '\\n' ? '\n' : segmentIdentifier,
  143. max_tokens: max,
  144. },
  145. }
  146. processRule.rules = ruleObj
  147. }
  148. return processRule
  149. }
  150. const getFileIndexingEstimateParams = () => {
  151. const params = {
  152. file_id: file?.id,
  153. dataset_id: datasetId,
  154. indexing_technique: getIndexing_technique(),
  155. process_rule: getProcessRule(),
  156. }
  157. return params
  158. }
  159. const fetchFileIndexingEstimate = async () => {
  160. const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams())
  161. if (segmentationType === SegmentType.CUSTOM) {
  162. setCustomFileIndexingEstimate(res)
  163. }
  164. else {
  165. setAutomaticFileIndexingEstimate(res)
  166. }
  167. }
  168. const getCreationParams = () => {
  169. let params
  170. if (isSetting) {
  171. params = {
  172. original_document_id: documentDetail?.id,
  173. process_rule: getProcessRule(),
  174. } as CreateDocumentReq
  175. } else {
  176. params = {
  177. data_source: {
  178. type: 'upload_file',
  179. info: file?.id,
  180. name: file?.name,
  181. },
  182. indexing_technique: getIndexing_technique(),
  183. process_rule: getProcessRule(),
  184. } as CreateDocumentReq
  185. }
  186. return params
  187. }
  188. const getRules = async () => {
  189. try {
  190. const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
  191. const separator = res.rules.segmentation.separator
  192. setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
  193. setMax(res.rules.segmentation.max_tokens)
  194. setRules(res.rules.pre_processing_rules)
  195. setDefaultConfig(res.rules)
  196. }
  197. catch (err) {
  198. console.log(err)
  199. }
  200. }
  201. const getRulesFromDetail = () => {
  202. if (documentDetail) {
  203. const rules = documentDetail.dataset_process_rule.rules
  204. const separator = rules.segmentation.separator
  205. const max = rules.segmentation.max_tokens
  206. setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
  207. setMax(max)
  208. setRules(rules.pre_processing_rules)
  209. setDefaultConfig(rules)
  210. }
  211. }
  212. const getDefaultMode = () => {
  213. if (documentDetail) {
  214. setSegmentationType(documentDetail.dataset_process_rule.mode)
  215. }
  216. }
  217. const createHandle = async () => {
  218. try {
  219. let res;
  220. const params = getCreationParams()
  221. if (!datasetId) {
  222. res = await createFirstDocument({
  223. body: params
  224. })
  225. updateIndexingTypeCache && updateIndexingTypeCache(indexType)
  226. updateResultCache && updateResultCache(res)
  227. } else {
  228. res = await createDocument({
  229. datasetId,
  230. body: params
  231. })
  232. updateIndexingTypeCache && updateIndexingTypeCache(indexType)
  233. updateResultCache && updateResultCache({
  234. document: res,
  235. })
  236. }
  237. onStepChange && onStepChange(+1)
  238. isSetting && onSave && onSave()
  239. }
  240. catch (err) {
  241. Toast.notify({
  242. type: 'error',
  243. message: err + '',
  244. })
  245. }
  246. }
  247. useEffect(() => {
  248. // fetch rules
  249. if (!isSetting) {
  250. getRules()
  251. } else {
  252. getRulesFromDetail()
  253. getDefaultMode()
  254. }
  255. }, [])
  256. useEffect(() => {
  257. scrollRef.current?.addEventListener('scroll', scrollHandle);
  258. return () => {
  259. scrollRef.current?.removeEventListener('scroll', scrollHandle);
  260. }
  261. }, [])
  262. useLayoutEffect(() => {
  263. if (showPreview) {
  264. previewScrollRef.current?.addEventListener('scroll', previewScrollHandle);
  265. return () => {
  266. previewScrollRef.current?.removeEventListener('scroll', previewScrollHandle);
  267. }
  268. }
  269. }, [showPreview])
  270. useEffect(() => {
  271. // get indexing type by props
  272. if (indexingType) {
  273. setIndexType(indexingType as IndexingType)
  274. } else {
  275. setIndexType(hasSetAPIKEY ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
  276. }
  277. }, [hasSetAPIKEY, indexingType, datasetId])
  278. useEffect(() => {
  279. if (segmentationType === SegmentType.AUTO) {
  280. setAutomaticFileIndexingEstimate(null)
  281. setShowPreview()
  282. fetchFileIndexingEstimate()
  283. } else {
  284. hidePreview()
  285. setCustomFileIndexingEstimate(null)
  286. }
  287. }, [segmentationType, indexType])
  288. return (
  289. <div className='flex w-full h-full'>
  290. <div ref={scrollRef} className='relative h-full w-full overflow-y-scroll'>
  291. <div className={cn(s.pageHeader, scrolled && s.fixed)}>{t('datasetCreation.steps.two')}</div>
  292. <div className={cn(s.form)}>
  293. <div className={s.label}>{t('datasetCreation.stepTwo.segmentation')}</div>
  294. <div className='max-w-[640px]'>
  295. <div
  296. className={cn(
  297. s.radioItem,
  298. s.segmentationItem,
  299. segmentationType === SegmentType.AUTO && s.active
  300. )}
  301. onClick={() => setSegmentationType(SegmentType.AUTO)}
  302. >
  303. <span className={cn(s.typeIcon, s.auto)} />
  304. <span className={cn(s.radio)} />
  305. <div className={s.typeHeader}>
  306. <div className={s.title}>{t('datasetCreation.stepTwo.auto')}</div>
  307. <div className={s.tip}>{t('datasetCreation.stepTwo.autoDescription')}</div>
  308. </div>
  309. </div>
  310. <div
  311. className={cn(
  312. s.radioItem,
  313. s.segmentationItem,
  314. segmentationType === SegmentType.CUSTOM && s.active,
  315. segmentationType === SegmentType.CUSTOM && s.custom,
  316. )}
  317. onClick={() => setSegmentationType(SegmentType.CUSTOM)}
  318. >
  319. <span className={cn(s.typeIcon, s.customize)} />
  320. <span className={cn(s.radio)} />
  321. <div className={s.typeHeader}>
  322. <div className={s.title}>{t('datasetCreation.stepTwo.custom')}</div>
  323. <div className={s.tip}>{t('datasetCreation.stepTwo.customDescription')}</div>
  324. </div>
  325. {segmentationType === SegmentType.CUSTOM && (
  326. <div className={s.typeFormBody}>
  327. <div className={s.formRow}>
  328. <div className='w-full'>
  329. <div className={s.label}>{t('datasetCreation.stepTwo.separator')}</div>
  330. <input
  331. type="text"
  332. className={s.input}
  333. placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier}
  334. onChange={(e) => setSegmentIdentifier(e.target.value)}
  335. />
  336. </div>
  337. </div>
  338. <div className={s.formRow}>
  339. <div className='w-full'>
  340. <div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div>
  341. <input
  342. type="number"
  343. className={s.input}
  344. placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={max}
  345. onChange={(e) => setMax(Number(e.target.value))}
  346. />
  347. </div>
  348. </div>
  349. <div className={s.formRow}>
  350. <div className='w-full'>
  351. <div className={s.label}>{t('datasetCreation.stepTwo.rules')}</div>
  352. {rules.map(rule => (
  353. <div key={rule.id} className={s.ruleItem}>
  354. <input id={rule.id} type="checkbox" defaultChecked={rule.enabled} onChange={() => ruleChangeHandle(rule.id)} className="w-4 h-4 rounded border-gray-300 text-blue-700 focus:ring-blue-700" />
  355. <label htmlFor={rule.id} className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label>
  356. </div>
  357. ))}
  358. </div>
  359. </div>
  360. <div className={s.formFooter}>
  361. <Button type="primary" className={cn(s.button, '!h-8 text-primary-600')} onClick={confirmChangeCustomConfig}>{t('datasetCreation.stepTwo.preview')}</Button>
  362. <Button className={cn(s.button, 'ml-2 !h-8')} onClick={resetRules}>{t('datasetCreation.stepTwo.reset')}</Button>
  363. </div>
  364. </div>
  365. )}
  366. </div>
  367. </div>
  368. <div className={s.label}>{t('datasetCreation.stepTwo.indexMode')}</div>
  369. <div className='max-w-[640px]'>
  370. <div className='flex items-center gap-3'>
  371. {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
  372. <div
  373. className={cn(
  374. s.radioItem,
  375. s.indexItem,
  376. !hasSetAPIKEY && s.disabled,
  377. !hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active,
  378. hasSetIndexType && s.disabled,
  379. hasSetIndexType && '!w-full',
  380. )}
  381. onClick={() => {
  382. if (hasSetAPIKEY) {
  383. setIndexType(IndexingType.QUALIFIED)
  384. }
  385. }}
  386. >
  387. <span className={cn(s.typeIcon, s.qualified)} />
  388. {!hasSetIndexType && <span className={cn(s.radio)} />}
  389. <div className={s.typeHeader}>
  390. <div className={s.title}>
  391. {t('datasetCreation.stepTwo.qualified')}
  392. {!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>}
  393. </div>
  394. <div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
  395. <div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
  396. {
  397. !!fileIndexingEstimate ? (
  398. <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(fileIndexingEstimate.total_price)}</span>)</div>
  399. ) : (
  400. <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
  401. )
  402. }
  403. </div>
  404. {!hasSetAPIKEY && (
  405. <div className={s.warningTip}>
  406. <span>{t('datasetCreation.stepTwo.warning')}&nbsp;</span>
  407. <span className={s.click} onClick={onSetting}>{t('datasetCreation.stepTwo.click')}</span>
  408. </div>
  409. )}
  410. </div>
  411. )}
  412. {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
  413. <div
  414. className={cn(
  415. s.radioItem,
  416. s.indexItem,
  417. !hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active,
  418. hasSetIndexType && s.disabled,
  419. hasSetIndexType && '!w-full',
  420. )}
  421. onClick={() => !hasSetIndexType && setIndexType(IndexingType.ECONOMICAL)}
  422. >
  423. <span className={cn(s.typeIcon, s.economical)} />
  424. {!hasSetIndexType && <span className={cn(s.radio)} />}
  425. <div className={s.typeHeader}>
  426. <div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div>
  427. <div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div>
  428. <div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
  429. <div className='text-xs font-medium text-gray-800'>0 tokens</div>
  430. </div>
  431. </div>
  432. )}
  433. </div>
  434. {hasSetIndexType && (
  435. <div className='mt-2 text-xs text-gray-500 font-medium'>
  436. {t('datasetCreation.stepTwo.indexSettedTip')}
  437. <Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
  438. </div>
  439. )}
  440. <div className={s.file}>
  441. <div className={s.fileContent}>
  442. <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.fileName')}</div>
  443. <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
  444. <span className={cn(s.fileIcon, file && s[file.extension])} />
  445. {getFileName(file?.name || '')}
  446. </div>
  447. </div>
  448. <div className={s.divider} />
  449. <div className={s.fileContent}>
  450. <div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateSegment')}</div>
  451. <div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
  452. {
  453. !!fileIndexingEstimate ? (
  454. <div className='text-xs font-medium text-gray-800'>{formatNumber(fileIndexingEstimate.total_segments)} </div>
  455. ) : (
  456. <div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
  457. )
  458. }
  459. </div>
  460. </div>
  461. </div>
  462. {!isSetting ? (
  463. <div className='flex items-center mt-8 py-2'>
  464. <Button onClick={() => onStepChange && onStepChange(-1)}>{t('datasetCreation.stepTwo.lastStep')}</Button>
  465. <div className={s.divider} />
  466. <Button type='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
  467. </div>
  468. ) : (
  469. <div className='flex items-center mt-8 py-2'>
  470. <Button type='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
  471. <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
  472. </div>
  473. )}
  474. </div>
  475. </div>
  476. </div>
  477. {(showPreview) ? (
  478. <div ref={previewScrollRef} className={cn(s.previewWrap, 'relativeh-full overflow-y-scroll border-l border-[#F2F4F7]')}>
  479. <div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`, ' flex items-center justify-between px-8')}>
  480. <span>{t('datasetCreation.stepTwo.previewTitle')}</span>
  481. <div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
  482. <XMarkIcon className='h-4 w-4'></XMarkIcon>
  483. </div>
  484. </div>
  485. <div className='my-4 px-8 space-y-4'>
  486. {fileIndexingEstimate?.preview ? (
  487. <>
  488. {fileIndexingEstimate?.preview.map((item, index) => (
  489. <PreviewItem key={item} content={item} index={index + 1} />
  490. ))}
  491. </>
  492. ) : <div className='flex items-center justify-center h-[200px]'><Loading type='area'></Loading></div>
  493. }
  494. </div>
  495. </div>
  496. ) :
  497. (<div className={cn(s.sideTip)}>
  498. <div className={s.tipCard}>
  499. <span className={s.icon} />
  500. <div className={s.title}>{t('datasetCreation.stepTwo.sideTipTitle')}</div>
  501. <div className={s.content}>
  502. <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP1')}</p>
  503. <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP2')}</p>
  504. <p className='mb-3'>{t('datasetCreation.stepTwo.sideTipP3')}</p>
  505. <p>{t('datasetCreation.stepTwo.sideTipP4')}</p>
  506. </div>
  507. </div>
  508. </div>)}
  509. </div>
  510. )
  511. }
  512. export default StepTwo