Quellcode durchsuchen

Feat/customizable file upload config (#818)

Matri vor 1 Jahr
Ursprung
Commit
155a4733f6

+ 7 - 1
api/config.py

@@ -62,7 +62,9 @@ DEFAULTS = {
     'HOSTED_ANTHROPIC_PAID_ENABLED': 'False',
     'HOSTED_ANTHROPIC_PAID_INCREASE_QUOTA': 1,
     'TENANT_DOCUMENT_COUNT': 100,
-    'CLEAN_DAY_SETTING': 30
+    'CLEAN_DAY_SETTING': 30,
+    'UPLOAD_FILE_SIZE_LIMIT': 15,
+    'UPLOAD_FILE_BATCH_LIMIT': 5,
 }
 
 
@@ -244,6 +246,10 @@ class Config:
         self.TENANT_DOCUMENT_COUNT = get_env('TENANT_DOCUMENT_COUNT')
         self.CLEAN_DAY_SETTING = get_env('CLEAN_DAY_SETTING')
 
+        # uploading settings
+        self.UPLOAD_FILE_SIZE_LIMIT = int(get_env('UPLOAD_FILE_SIZE_LIMIT'))
+        self.UPLOAD_FILE_BATCH_LIMIT = int(get_env('UPLOAD_FILE_BATCH_LIMIT'))
+
 
 class CloudEditionConfig(Config):
 

+ 0 - 4
api/controllers/console/datasets/data_source.py

@@ -21,10 +21,6 @@ from tasks.document_indexing_sync_task import document_indexing_sync_task
 
 cache = TTLCache(maxsize=None, ttl=30)
 
-FILE_SIZE_LIMIT = 15 * 1024 * 1024  # 15MB
-ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm']
-PREVIEW_WORDS_LIMIT = 3000
-
 
 class DataSourceApi(Resource):
     integrate_icon_fields = {

+ 20 - 3
api/controllers/console/datasets/file.py

@@ -25,12 +25,28 @@ from models.model import UploadFile
 
 cache = TTLCache(maxsize=None, ttl=30)
 
-FILE_SIZE_LIMIT = 15 * 1024 * 1024  # 15MB
 ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx']
 PREVIEW_WORDS_LIMIT = 3000
 
 
 class FileApi(Resource):
+    upload_config_fields = {
+        'file_size_limit': fields.Integer,
+        'batch_count_limit': fields.Integer
+    }
+
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @marshal_with(upload_config_fields)
+    def get(self):
+        file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT")
+        batch_count_limit = current_app.config.get("UPLOAD_FILE_BATCH_LIMIT")
+        return {
+            'file_size_limit': file_size_limit,
+            'batch_count_limit': batch_count_limit
+        }, 200
+
     file_fields = {
         'id': fields.String,
         'name': fields.String,
@@ -60,8 +76,9 @@ class FileApi(Resource):
         file_content = file.read()
         file_size = len(file_content)
 
-        if file_size > FILE_SIZE_LIMIT:
-            message = "({file_size} > {FILE_SIZE_LIMIT})"
+        file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT") * 1024 * 1024
+        if file_size > file_size_limit:
+            message = "({file_size} > {file_size_limit})"
             raise FileTooLargeError(message)
 
         extension = file.filename.split('.')[-1]

+ 2 - 2
web/app/components/datasets/create/file-preview/index.tsx

@@ -4,7 +4,7 @@ import { useTranslation } from 'react-i18next'
 import cn from 'classnames'
 import { XMarkIcon } from '@heroicons/react/20/solid'
 import s from './index.module.css'
-import type { File } from '@/models/datasets'
+import type { CustomFile as File } from '@/models/datasets'
 import { fetchFilePreview } from '@/service/common'
 
 type IProps = {
@@ -37,7 +37,7 @@ const FilePreview = ({
   }
 
   useEffect(() => {
-    if (file) {
+    if (file?.id) {
       setLoading(true)
       getPreviewContent(file.id)
     }

+ 56 - 50
web/app/components/datasets/create/file-uploader/index.tsx

@@ -1,21 +1,23 @@
 'use client'
-import React, { useEffect, useRef, useState } from 'react'
+import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
 import cn from 'classnames'
+import useSWR from 'swr'
 import s from './index.module.css'
-import type { File as FileEntity } from '@/models/datasets'
+import type { CustomFile as File, FileItem } from '@/models/datasets'
 import { ToastContext } from '@/app/components/base/toast'
 
 import { upload } from '@/service/base'
+import { fetchFileUploadConfig } from '@/service/common'
 
 type IFileUploaderProps = {
-  fileList: any[]
+  fileList: FileItem[]
   titleClassName?: string
-  prepareFileList: (files: any[]) => void
-  onFileUpdate: (fileItem: any, progress: number, list: any[]) => void
+  prepareFileList: (files: FileItem[]) => void
+  onFileUpdate: (fileItem: FileItem, progress: number, list: FileItem[]) => void
   onFileListUpdate?: (files: any) => void
-  onPreview: (file: FileEntity) => void
+  onPreview: (file: File) => void
 }
 
 const ACCEPTS = [
@@ -30,9 +32,6 @@ const ACCEPTS = [
   '.csv',
 ]
 
-const MAX_SIZE = 15 * 1024 * 1024
-const BATCH_COUNT = 5
-
 const FileUploader = ({
   fileList,
   titleClassName,
@@ -48,7 +47,13 @@ const FileUploader = ({
   const dragRef = useRef<HTMLDivElement>(null)
   const fileUploader = useRef<HTMLInputElement>(null)
 
-  const fileListRef = useRef<any>([])
+  const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig)
+  const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
+    file_size_limit: 15,
+    batch_count_limit: 5,
+  }, [fileUploadConfigResponse])
+
+  const fileListRef = useRef<FileItem[]>([])
 
   // utils
   const getFileType = (currentFile: File) => {
@@ -66,21 +71,21 @@ const FileUploader = ({
     return `${(size / 1024 / 1024).toFixed(2)}MB`
   }
 
-  const isValid = (file: File) => {
+  const isValid = useCallback((file: File) => {
     const { size } = file
     const ext = `.${getFileType(file)}`
     const isValidType = ACCEPTS.includes(ext)
     if (!isValidType)
       notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') })
 
-    const isValidSize = size <= MAX_SIZE
+    const isValidSize = size <= fileUploadConfig.file_size_limit * 1024 * 1024
     if (!isValidSize)
-      notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size') })
+      notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size', { size: fileUploadConfig.file_size_limit }) })
 
     return isValidType && isValidSize
-  }
+  }, [fileUploadConfig, notify, t])
 
-  const fileUpload = async (fileItem: any) => {
+  const fileUpload = useCallback(async (fileItem: FileItem): Promise<FileItem> => {
     const formData = new FormData()
     formData.append('file', fileItem.file)
     const onProgress = (e: ProgressEvent) => {
@@ -90,19 +95,19 @@ const FileUploader = ({
       }
     }
 
+    const fileListCopy = fileListRef.current
     return upload({
       xhr: new XMLHttpRequest(),
       data: formData,
       onprogress: onProgress,
     })
-      .then((res: FileEntity) => {
-        const fileListCopy = fileListRef.current
-
+      .then((res: File) => {
         const completeFile = {
           fileID: fileItem.fileID,
           file: res,
+          progress: -1,
         }
-        const index = fileListCopy.findIndex((item: any) => item.fileID === fileItem.fileID)
+        const index = fileListCopy.findIndex(item => item.fileID === fileItem.fileID)
         fileListCopy[index] = completeFile
         onFileUpdate(completeFile, 100, fileListCopy)
         return Promise.resolve({ ...completeFile })
@@ -113,42 +118,44 @@ const FileUploader = ({
         return Promise.resolve({ ...fileItem })
       })
       .finally()
-  }
-  const uploadBatchFiles = (bFiles: any) => {
-    bFiles.forEach((bf: any) => (bf.progress = 0))
-    return Promise.all(bFiles.map((bFile: any) => fileUpload(bFile)))
-  }
-  const uploadMultipleFiles = async (files: any) => {
+  }, [fileListRef, notify, onFileUpdate, t])
+
+  const uploadBatchFiles = useCallback((bFiles: FileItem[]) => {
+    bFiles.forEach(bf => (bf.progress = 0))
+    return Promise.all(bFiles.map(fileUpload))
+  }, [fileUpload])
+
+  const uploadMultipleFiles = useCallback(async (files: FileItem[]) => {
+    const batchCountLimit = fileUploadConfig.batch_count_limit
     const length = files.length
     let start = 0
     let end = 0
 
     while (start < length) {
-      if (start + BATCH_COUNT > length)
+      if (start + batchCountLimit > length)
         end = length
       else
-        end = start + BATCH_COUNT
+        end = start + batchCountLimit
       const bFiles = files.slice(start, end)
       await uploadBatchFiles(bFiles)
       start = end
     }
-  }
-  const initialUpload = (files: any) => {
+  }, [fileUploadConfig, uploadBatchFiles])
+
+  const initialUpload = useCallback((files: File[]) => {
     if (!files.length)
       return false
-    const preparedFiles = files.map((file: any, index: number) => {
-      const fileItem = {
-        fileID: `file${index}-${Date.now()}`,
-        file,
-        progress: -1,
-      }
-      return fileItem
-    })
+    const preparedFiles = files.map((file, index) => ({
+      fileID: `file${index}-${Date.now()}`,
+      file,
+      progress: -1,
+    }))
     const newFiles = [...fileListRef.current, ...preparedFiles]
     prepareFileList(newFiles)
     fileListRef.current = newFiles
     uploadMultipleFiles(preparedFiles)
-  }
+  }, [prepareFileList, uploadMultipleFiles])
+
   const handleDragEnter = (e: DragEvent) => {
     e.preventDefault()
     e.stopPropagation()
@@ -164,18 +171,17 @@ const FileUploader = ({
     e.target === dragRef.current && setDragging(false)
   }
 
-  const handleDrop = (e: DragEvent) => {
+  const handleDrop = useCallback((e: DragEvent) => {
     e.preventDefault()
     e.stopPropagation()
     setDragging(false)
     if (!e.dataTransfer)
       return
 
-    const files = [...e.dataTransfer.files]
-    const validFiles = files.filter(file => isValid(file))
-    // fileUpload(files[0])
+    const files = [...e.dataTransfer.files] as File[]
+    const validFiles = files.filter(isValid)
     initialUpload(validFiles)
-  }
+  }, [initialUpload, isValid])
 
   const selectHandle = () => {
     if (fileUploader.current)
@@ -186,13 +192,13 @@ const FileUploader = ({
     if (fileUploader.current)
       fileUploader.current.value = ''
 
-    fileListRef.current = fileListRef.current.filter((item: any) => item.fileID !== fileID)
+    fileListRef.current = fileListRef.current.filter(item => item.fileID !== fileID)
     onFileListUpdate?.([...fileListRef.current])
   }
-  const fileChangeHandle = (e: React.ChangeEvent<HTMLInputElement>) => {
-    const files = [...(e.target.files ?? [])].filter(file => isValid(file))
-    initialUpload(files)
-  }
+  const fileChangeHandle = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
+    const files = [...(e.target.files ?? [])] as File[]
+    initialUpload(files.filter(isValid))
+  }, [isValid, initialUpload])
 
   useEffect(() => {
     dropRef.current?.addEventListener('dragenter', handleDragEnter)
@@ -205,7 +211,7 @@ const FileUploader = ({
       dropRef.current?.removeEventListener('dragleave', handleDragLeave)
       dropRef.current?.removeEventListener('drop', handleDrop)
     }
-  }, [])
+  }, [handleDrop])
 
   return (
     <div className={s.fileUploader}>
@@ -225,7 +231,7 @@ const FileUploader = ({
           <span>{t('datasetCreation.stepOne.uploader.button')}</span>
           <label className={s.browse} onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
         </div>
-        <div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip')}</div>
+        <div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip', { size: fileUploadConfig.file_size_limit })}</div>
         {dragging && <div ref={dragRef} className={s.draggingCover}/>}
       </div>
       <div className={s.fileList}>

+ 5 - 5
web/app/components/datasets/create/index.tsx

@@ -8,7 +8,7 @@ import StepOne from './step-one'
 import StepTwo from './step-two'
 import StepThree from './step-three'
 import { DataSourceType } from '@/models/datasets'
-import type { DataSet, createDocumentResponse } from '@/models/datasets'
+import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
 import { fetchDataSource, fetchTenantInfo } from '@/service/common'
 import { fetchDataDetail } from '@/service/datasets'
 import type { DataSourceNotionPage } from '@/models/common'
@@ -30,7 +30,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
   const [step, setStep] = useState(1)
   const [indexingTypeCache, setIndexTypeCache] = useState('')
-  const [fileList, setFiles] = useState<any[]>([])
+  const [fileList, setFiles] = useState<FileItem[]>([])
   const [result, setResult] = useState<createDocumentResponse | undefined>()
   const [hasError, setHasError] = useState(false)
 
@@ -39,12 +39,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
     setNotionPages(value)
   }
 
-  const updateFileList = (preparedFiles: any) => {
+  const updateFileList = (preparedFiles: FileItem[]) => {
     setFiles(preparedFiles)
   }
 
-  const updateFile = (fileItem: any, progress: number, list: any[]) => {
-    const targetIndex = list.findIndex((file: any) => file.fileID === fileItem.fileID)
+  const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
+    const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
     list[targetIndex] = {
       ...list[targetIndex],
       progress,

+ 4 - 4
web/app/components/datasets/create/step-one/index.tsx

@@ -7,7 +7,7 @@ import FileUploader from '../file-uploader'
 import NotionPagePreview from '../notion-page-preview'
 import EmptyDatasetCreationModal from '../empty-dataset-creation-modal'
 import s from './index.module.css'
-import type { File } from '@/models/datasets'
+import type { FileItem } from '@/models/datasets'
 import type { DataSourceNotionPage } from '@/models/common'
 import { DataSourceType } from '@/models/datasets'
 import Button from '@/app/components/base/button'
@@ -20,9 +20,9 @@ type IStepOneProps = {
   dataSourceTypeDisable: Boolean
   hasConnection: boolean
   onSetting: () => void
-  files: any[]
-  updateFileList: (files: any[]) => void
-  updateFile: (fileItem: any, progress: number, list: any[]) => void
+  files: FileItem[]
+  updateFileList: (files: FileItem[]) => void
+  updateFile: (fileItem: FileItem, progress: number, list: FileItem[]) => void
   notionPages?: any[]
   updateNotionPages: (value: any[]) => void
   onStepChange: () => void

+ 2 - 2
web/app/components/datasets/create/step-two/index.tsx

@@ -9,7 +9,7 @@ import Link from 'next/link'
 import { groupBy } from 'lodash-es'
 import PreviewItem, { PreviewType } from './preview-item'
 import s from './index.module.css'
-import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
+import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
 import {
   createDocument,
   createFirstDocument,
@@ -39,7 +39,7 @@ type StepTwoProps = {
   datasetId?: string
   indexingType?: string
   dataSourceType: DataSourceType
-  files: File[]
+  files: CustomFile[]
   notionPages?: Page[]
   onStepChange?: (delta: number) => void
   updateIndexingTypeCache?: (type: string) => void

+ 2 - 2
web/i18n/lang/dataset-creation.en.ts

@@ -23,10 +23,10 @@ const translation = {
       title: 'Upload text file',
       button: 'Drag and drop file, or',
       browse: 'Browse',
-      tip: 'Supports txt, html, markdown, xlsx, and pdf. Max 15MB each.',
+      tip: 'Supports txt, html, markdown, xlsx, and pdf. Max {{size}}MB each.',
       validation: {
         typeError: 'File type not supported',
-        size: 'File too large. Maximum is 15MB',
+        size: 'File too large. Maximum is {{size}}MB',
         count: 'Multiple files not supported',
       },
       cancel: 'Cancel',

+ 2 - 2
web/i18n/lang/dataset-creation.zh.ts

@@ -23,10 +23,10 @@ const translation = {
       title: '上传文本文件',
       button: '拖拽文件至此,或者',
       browse: '选择文件',
-      tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 15 MB。',
+      tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 {{size}}MB。',
       validation: {
         typeError: '文件类型不支持',
-        size: '文件太大了,不能超过 15MB',
+        size: '文件太大了,不能超过 {{size}}MB',
         count: '暂不支持多个文件',
       },
       cancel: '取消',

+ 5 - 0
web/models/common.ts

@@ -168,3 +168,8 @@ export type PluginProvider = {
     api_key: string
   } | null
 }
+
+export type FileUploadConfigResponse = {
+  file_size_limit: number
+  batch_count_limit: number
+}

+ 12 - 8
web/models/datasets.ts

@@ -24,14 +24,18 @@ export type DataSet = {
   word_count: number
 }
 
-export type File = {
-  id: string
-  name: string
-  size: number
-  extension: string
-  mime_type: string
-  created_by: string
-  created_at: number
+export type CustomFile = File & {
+  id?: string
+  extension?: string
+  mime_type?: string
+  created_by?: string
+  created_at?: number
+}
+
+export type FileItem = {
+  fileID: string
+  file: CustomFile
+  progress: number
 }
 
 export type DataSetListResponse = {

+ 5 - 0
web/service/common.ts

@@ -2,6 +2,7 @@ import type { Fetcher } from 'swr'
 import { del, get, patch, post, put } from './base'
 import type {
   AccountIntegrate, CommonResponse, DataSourceNotion,
+  FileUploadConfigResponse,
   ICurrentWorkspace,
   IWorkspace, LangGeniusVersionResponse, Member,
   OauthResponse, PluginProvider, Provider, ProviderAnthropicToken, ProviderAzureToken,
@@ -178,3 +179,7 @@ export const updateDefaultModel: Fetcher<CommonResponse, { url: string; body: an
 export const submitFreeQuota: Fetcher<{ type: string; redirect_url?: string; result?: string }, string> = (url) => {
   return post(url) as Promise<{ type: string; redirect_url?: string; result?: string }>
 }
+
+export const fetchFileUploadConfig: Fetcher<FileUploadConfigResponse, { url: string }> = ({ url }) => {
+  return get(url) as Promise<FileUploadConfigResponse>
+}