瀏覽代碼

fix: fix document list overlap and optimize document list fetching (#15377)

Wu Tianwei 1 月之前
父節點
當前提交
2cf0cb471f

+ 10 - 5
web/app/components/datasets/documents/detail/index.tsx

@@ -23,7 +23,7 @@ import FloatRightContainer from '@/app/components/base/float-right-container'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
 import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
-import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
+import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '@/service/knowledge/use-document'
 import { useInvalid } from '@/service/use-base'
 
 type DocumentContextValue = {
@@ -152,17 +152,22 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
 
   const invalidChunkList = useInvalid(useSegmentListKey)
   const invalidChildChunkList = useInvalid(useChildSegmentListKey)
+  const invalidDocumentList = useInvalidDocumentList(datasetId)
 
   const handleOperate = (operateName?: string) => {
+    invalidDocumentList()
     if (operateName === 'delete') {
       backToPrev()
     }
     else {
       detailMutate()
-      setTimeout(() => {
-        invalidChunkList()
-        invalidChildChunkList()
-      }, 5000)
+      // If operation is not rename, refresh the chunk list after 5 seconds
+      if (operateName) {
+        setTimeout(() => {
+          invalidChunkList()
+          invalidChildChunkList()
+        }, 5000)
+      }
     }
   }
 

+ 27 - 30
web/app/components/datasets/documents/index.tsx

@@ -1,11 +1,10 @@
 'use client'
 import type { FC } from 'react'
 import React, { useCallback, useEffect, useMemo, useState } from 'react'
-import useSWR from 'swr'
 import { useTranslation } from 'react-i18next'
 import { useRouter } from 'next/navigation'
 import { useDebounce, useDebounceFn } from 'ahooks'
-import { groupBy, omit } from 'lodash-es'
+import { groupBy } from 'lodash-es'
 import { PlusIcon } from '@heroicons/react/24/solid'
 import { RiExternalLinkLine } from '@remixicon/react'
 import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
@@ -15,16 +14,16 @@ import Loading from '@/app/components/base/loading'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import { get } from '@/service/base'
-import { createDocument, fetchDocuments } from '@/service/datasets'
+import { createDocument } from '@/service/datasets'
 import { useDatasetDetailContext } from '@/context/dataset-detail'
 import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
 import type { NotionPage } from '@/models/common'
 import type { CreateDocumentReq } from '@/models/datasets'
-import { DataSourceType } from '@/models/datasets'
+import { DataSourceType, ProcessMode } from '@/models/datasets'
 import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
 import { useProviderContext } from '@/context/provider-context'
 import cn from '@/utils/classnames'
-import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
+import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document'
 import { useInvalid } from '@/service/use-base'
 import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
 
@@ -73,12 +72,12 @@ const EmptyElement: FC<{ canAdd: boolean; onClick: () => void; type?: 'upload' |
   </div>
 }
 
-interface IDocumentsProps {
+type IDocumentsProps = {
   datasetId: string
 }
 
 export const fetcher = (url: string) => get(url, {}, {})
-const DEFAULT_LIMIT = 15
+const DEFAULT_LIMIT = 10
 
 const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
   const { t } = useTranslation()
@@ -99,33 +98,33 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
 
   const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })
 
-  const query = useMemo(() => {
-    return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
-  }, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
-
-  const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR(
-    {
-      action: 'fetchDocuments',
-      datasetId,
-      params: query,
+  const { data: documentsRes, isFetching: isListLoading } = useDocumentList({
+    datasetId,
+    query: {
+      page: currPage + 1,
+      limit,
+      keyword: debouncedSearchValue,
     },
-    apiParams => fetchDocuments(omit(apiParams, 'action')),
-    { refreshInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0 },
-  )
+    refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0,
+  })
+
+  const invalidDocumentList = useInvalidDocumentList(datasetId)
 
-  const [isMuting, setIsMuting] = useState(false)
   useEffect(() => {
-    if (!isListLoading && isMuting)
-      setIsMuting(false)
-  }, [isListLoading, isMuting])
+    if (documentsRes) {
+      const totalPages = Math.ceil(documentsRes.total / limit)
+      if (totalPages < currPage + 1)
+        setCurrPage(totalPages === 0 ? 0 : totalPages - 1)
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [documentsRes])
 
   const invalidDocumentDetail = useInvalidDocumentDetailKey()
   const invalidChunkList = useInvalid(useSegmentListKey)
   const invalidChildChunkList = useInvalid(useChildSegmentListKey)
 
   const handleUpdate = useCallback(() => {
-    setIsMuting(true)
-    mutate()
+    invalidDocumentList()
     invalidDocumentDetail()
     setTimeout(() => {
       invalidChunkList()
@@ -175,8 +174,6 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
     router.push(`/datasets/${datasetId}/documents/create`)
   }
 
-  const isLoading = isListLoading // !documentsRes && !error
-
   const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
     const workspacesMap = groupBy(selectedPages, 'workspace_id')
     const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
@@ -209,7 +206,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
       indexing_technique: dataset?.indexing_technique,
       process_rule: {
         rules: {},
-        mode: 'automatic',
+        mode: ProcessMode.general,
       },
     } as CreateDocumentReq
 
@@ -217,7 +214,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
       datasetId,
       body: params,
     })
-    mutate()
+    invalidDocumentList()
     setTimerCanRun(true)
     // mutateDatasetIndexingStatus(undefined, { revalidate: true })
     setNotionPageSelectorModalVisible(false)
@@ -272,7 +269,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
             )}
           </div>
         </div>
-        {(isLoading && !isMuting)
+        {isListLoading
           ? <Loading type='app' />
           : total > 0
             ? <List

+ 108 - 106
web/app/components/datasets/documents/list.tsx

@@ -500,121 +500,123 @@ const DocumentList: FC<IDocumentListProps> = ({
   }
 
   return (
-    <div className='relative w-full h-full overflow-x-auto'>
-      <table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
-        <thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
-          <tr>
-            <td className='w-12'>
-              <div className='flex items-center' onClick={e => e.stopPropagation()}>
-                <Checkbox
-                  className='shrink-0 mr-2'
-                  checked={isAllSelected}
-                  mixed={!isAllSelected && isSomeSelected}
-                  onCheck={onSelectedAll}
-                />
-                #
-              </div>
-            </td>
-            <td>
-              <div className='flex'>
-                {t('datasetDocuments.list.table.header.fileName')}
-              </div>
-            </td>
-            <td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
-            <td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
-            <td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
-            <td className='w-44'>
-              <div className='flex items-center' onClick={onClickSort}>
-                {t('datasetDocuments.list.table.header.uploadTime')}
-                <ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
-              </div>
-            </td>
-            <td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
-            <td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
-          </tr>
-        </thead>
-        <tbody className="text-text-secondary">
-          {localDocs.map((doc, index) => {
-            const isFile = doc.data_source_type === DataSourceType.FILE
-            const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
-            return <tr
-              key={doc.id}
-              className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
-              onClick={() => {
-                router.push(`/datasets/${datasetId}/documents/${doc.id}`)
-              }}>
-              <td className='text-left align-middle text-text-tertiary text-xs'>
+    <div className='flex flex-col relative w-full h-full'>
+      <div className='grow overflow-x-auto'>
+        <table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
+          <thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
+            <tr>
+              <td className='w-12'>
                 <div className='flex items-center' onClick={e => e.stopPropagation()}>
                   <Checkbox
                     className='shrink-0 mr-2'
-                    checked={selectedIds.includes(doc.id)}
-                    onCheck={() => {
-                      onSelectedIdChange(
-                        selectedIds.includes(doc.id)
-                          ? selectedIds.filter(id => id !== doc.id)
-                          : [...selectedIds, doc.id],
-                      )
-                    }}
+                    checked={isAllSelected}
+                    mixed={!isAllSelected && isSomeSelected}
+                    onCheck={onSelectedAll}
                   />
-                  {/* {doc.position} */}
-                  {index + 1}
+                  #
                 </div>
               </td>
               <td>
-                <div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
-                  <div className='shrink-0'>
-                    {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
-                    {doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
-                    {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
-                  </div>
-                  <span className='text-sm truncate grow-1'>{doc.name}</span>
-                  <div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
-                    <Tooltip
-                      popupContent={t('datasetDocuments.list.table.rename')}
-                    >
-                      <div
-                        className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
-                        onClick={(e) => {
-                          e.stopPropagation()
-                          handleShowRenameModal(doc)
-                        }}
-                      >
-                        <Edit03 className='w-4 h-4 text-text-tertiary' />
-                      </div>
-                    </Tooltip>
-                  </div>
+                <div className='flex'>
+                  {t('datasetDocuments.list.table.header.fileName')}
                 </div>
               </td>
-              <td>
-                <ChunkingModeLabel
-                  isGeneralMode={isGeneralMode}
-                  isQAMode={isQAMode}
-                />
-              </td>
-              <td>{renderCount(doc.word_count)}</td>
-              <td>{renderCount(doc.hit_count)}</td>
-              <td className='text-text-secondary text-[13px]'>
-                {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
-              </td>
-              <td>
-                {
-                  (['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
-                    ? <ProgressBar percent={doc.percent || 0} />
-                    : <StatusItem status={doc.display_status} />
-                }
-              </td>
-              <td>
-                <OperationAction
-                  embeddingAvailable={embeddingAvailable}
-                  datasetId={datasetId}
-                  detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
-                  onUpdate={onUpdate}
-                />
+              <td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
+              <td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
+              <td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
+              <td className='w-44'>
+                <div className='flex items-center' onClick={onClickSort}>
+                  {t('datasetDocuments.list.table.header.uploadTime')}
+                  <ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
+                </div>
               </td>
+              <td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
+              <td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
             </tr>
-          })}
-        </tbody>
-      </table>
+          </thead>
+          <tbody className="text-text-secondary">
+            {localDocs.map((doc, index) => {
+              const isFile = doc.data_source_type === DataSourceType.FILE
+              const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
+              return <tr
+                key={doc.id}
+                className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
+                onClick={() => {
+                  router.push(`/datasets/${datasetId}/documents/${doc.id}`)
+                }}>
+                <td className='text-left align-middle text-text-tertiary text-xs'>
+                  <div className='flex items-center' onClick={e => e.stopPropagation()}>
+                    <Checkbox
+                      className='shrink-0 mr-2'
+                      checked={selectedIds.includes(doc.id)}
+                      onCheck={() => {
+                        onSelectedIdChange(
+                          selectedIds.includes(doc.id)
+                            ? selectedIds.filter(id => id !== doc.id)
+                            : [...selectedIds, doc.id],
+                        )
+                      }}
+                    />
+                    {/* {doc.position} */}
+                    {index + 1}
+                  </div>
+                </td>
+                <td>
+                  <div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
+                    <div className='shrink-0'>
+                      {doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex mt-[-3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
+                      {doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
+                      {doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex mt-[-3px] mr-1.5 align-middle' />}
+                    </div>
+                    <span className='text-sm truncate grow-1'>{doc.name}</span>
+                    <div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
+                      <Tooltip
+                        popupContent={t('datasetDocuments.list.table.rename')}
+                      >
+                        <div
+                          className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
+                          onClick={(e) => {
+                            e.stopPropagation()
+                            handleShowRenameModal(doc)
+                          }}
+                        >
+                          <Edit03 className='w-4 h-4 text-text-tertiary' />
+                        </div>
+                      </Tooltip>
+                    </div>
+                  </div>
+                </td>
+                <td>
+                  <ChunkingModeLabel
+                    isGeneralMode={isGeneralMode}
+                    isQAMode={isQAMode}
+                  />
+                </td>
+                <td>{renderCount(doc.word_count)}</td>
+                <td>{renderCount(doc.hit_count)}</td>
+                <td className='text-text-secondary text-[13px]'>
+                  {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
+                </td>
+                <td>
+                  {
+                    (['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
+                      ? <ProgressBar percent={doc.percent || 0} />
+                      : <StatusItem status={doc.display_status} />
+                  }
+                </td>
+                <td>
+                  <OperationAction
+                    embeddingAvailable={embeddingAvailable}
+                    datasetId={datasetId}
+                    detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
+                    onUpdate={onUpdate}
+                  />
+                </td>
+              </tr>
+            })}
+          </tbody>
+        </table>
+      </div>
       {(selectedIds.length > 0) && (
         <BatchAction
           className='absolute left-0 bottom-16 z-20'
@@ -629,10 +631,10 @@ const DocumentList: FC<IDocumentListProps> = ({
         />
       )}
       {/* Show Pagination only if the total is more than the limit */}
-      {pagination.total && pagination.total > (pagination.limit || 10) && (
+      {pagination.total && (
         <Pagination
           {...pagination}
-          className='absolute bottom-0 left-0 w-full px-0 pb-0'
+          className='shrink-0 w-full px-0 pb-0'
         />
       )}
 

+ 0 - 5
web/service/datasets.ts

@@ -5,7 +5,6 @@ import type {
   CreateDocumentReq,
   DataSet,
   DataSetListResponse,
-  DocumentListResponse,
   ErrorDocsResponse,
   ExternalAPIDeleteResponse,
   ExternalAPIItem,
@@ -122,10 +121,6 @@ export const fetchProcessRule: Fetcher<ProcessRuleResponse, { params: { document
   return get<ProcessRuleResponse>('/datasets/process-rule', { params: { document_id: documentId } })
 }
 
-export const fetchDocuments: Fetcher<DocumentListResponse, { datasetId: string; params: { keyword: string; page: number; limit: number; sort?: SortType } }> = ({ datasetId, params }) => {
-  return get<DocumentListResponse>(`/datasets/${datasetId}/documents`, { params })
-}
-
 export const createFirstDocument: Fetcher<createDocumentResponse, { body: CreateDocumentReq }> = ({ body }) => {
   return post<createDocumentResponse>('/datasets/init', { body })
 }

+ 13 - 9
web/service/knowledge/use-document.ts

@@ -4,8 +4,8 @@ import {
 } from '@tanstack/react-query'
 import { del, get, patch } from '../base'
 import { useInvalid } from '../use-base'
-import type { MetadataType } from '../datasets'
-import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets'
+import type { MetadataType, SortType } from '../datasets'
+import type { DocumentDetailResponse, DocumentListResponse, UpdateDocumentBatchParams } from '@/models/datasets'
 import { DocumentActionType } from '@/models/datasets'
 import type { CommonResponse } from '@/models/common'
 
@@ -18,19 +18,23 @@ export const useDocumentList = (payload: {
     keyword: string
     page: number
     limit: number
-  }
+    sort?: SortType
+  },
+  refetchInterval?: number | false
 }) => {
-  const { query, datasetId } = payload
-  return useQuery<{ data: SimpleDocumentDetail[] }>({
-    queryKey: [...useDocumentListKey, datasetId, query],
-    queryFn: () => get<{ data: SimpleDocumentDetail[] }>(`/datasets/${datasetId}/documents`, {
+  const { query, datasetId, refetchInterval } = payload
+  const { keyword, page, limit, sort } = query
+  return useQuery<DocumentListResponse>({
+    queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort],
+    queryFn: () => get<DocumentListResponse>(`/datasets/${datasetId}/documents`, {
       params: query,
     }),
+    refetchInterval,
   })
 }
 
-export const useInvalidDocumentList = () => {
-  return useInvalid(useDocumentListKey)
+export const useInvalidDocumentList = (datasetId?: string) => {
+  return useInvalid(datasetId ? [...useDocumentListKey, datasetId] : useDocumentListKey)
 }
 
 const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']