Pārlūkot izejas kodu

Feat: Add documents limitation (#2662)

Garfield Dai 1 gadu atpakaļ
vecāks
revīzija
8e66b96221

+ 2 - 1
api/controllers/console/datasets/file.py

@@ -11,7 +11,7 @@ from controllers.console.datasets.error import (
     UnsupportedFileTypeError,
 )
 from controllers.console.setup import setup_required
-from controllers.console.wraps import account_initialization_required
+from controllers.console.wraps import account_initialization_required, cloud_edition_billing_resource_check
 from fields.file_fields import file_fields, upload_config_fields
 from libs.login import login_required
 from services.file_service import ALLOWED_EXTENSIONS, UNSTRUSTURED_ALLOWED_EXTENSIONS, FileService
@@ -39,6 +39,7 @@ class FileApi(Resource):
     @login_required
     @account_initialization_required
     @marshal_with(file_fields)
+    @cloud_edition_billing_resource_check(resource='documents')
     def post(self):
 
         # get file from request

+ 8 - 0
api/controllers/console/wraps.py

@@ -56,6 +56,7 @@ def cloud_edition_billing_resource_check(resource: str,
                 members = features.members
                 apps = features.apps
                 vector_space = features.vector_space
+                documents_upload_quota = features.documents_upload_quota
                 annotation_quota_limit = features.annotation_quota_limit
 
                 if resource == 'members' and 0 < members.limit <= members.size:
@@ -64,6 +65,13 @@ def cloud_edition_billing_resource_check(resource: str,
                     abort(403, error_msg)
                 elif resource == 'vector_space' and 0 < vector_space.limit <= vector_space.size:
                     abort(403, error_msg)
+                elif resource == 'documents' and 0 < documents_upload_quota.limit <= documents_upload_quota.size:
+                    # The api of file upload is used in the multiple places, so we need to check the source of the request from datasets
+                    source = request.args.get('source')
+                    if source == 'datasets':
+                        abort(403, error_msg)
+                    else:
+                        return view(*args, **kwargs)
                 elif resource == 'workspace_custom' and not features.can_replace_logo:
                     abort(403, error_msg)
                 elif resource == 'annotation' and 0 < annotation_quota_limit.limit < annotation_quota_limit.size:

+ 2 - 0
api/controllers/service_api/dataset/document.py

@@ -28,6 +28,7 @@ class DocumentAddByTextApi(DatasetApiResource):
     """Resource for documents."""
 
     @cloud_edition_billing_resource_check('vector_space', 'dataset')
+    @cloud_edition_billing_resource_check('documents', 'dataset')
     def post(self, tenant_id, dataset_id):
         """Create document by text."""
         parser = reqparse.RequestParser()
@@ -153,6 +154,7 @@ class DocumentUpdateByTextApi(DatasetApiResource):
 class DocumentAddByFileApi(DatasetApiResource):
     """Resource for documents."""
     @cloud_edition_billing_resource_check('vector_space', 'dataset')
+    @cloud_edition_billing_resource_check('documents', 'dataset')
     def post(self, tenant_id, dataset_id):
         """Create document by upload file."""
         args = {}

+ 3 - 0
api/controllers/service_api/wraps.py

@@ -89,6 +89,7 @@ def cloud_edition_billing_resource_check(resource: str,
                 members = features.members
                 apps = features.apps
                 vector_space = features.vector_space
+                documents_upload_quota = features.documents_upload_quota
 
                 if resource == 'members' and 0 < members.limit <= members.size:
                     raise Unauthorized(error_msg)
@@ -96,6 +97,8 @@ def cloud_edition_billing_resource_check(resource: str,
                     raise Unauthorized(error_msg)
                 elif resource == 'vector_space' and 0 < vector_space.limit <= vector_space.size:
                     raise Unauthorized(error_msg)
+                elif resource == 'documents' and 0 < documents_upload_quota.limit <= documents_upload_quota.size:
+                    raise Unauthorized(error_msg)
                 else:
                     return view(*args, **kwargs)
 

+ 12 - 1
api/services/dataset_service.py

@@ -37,7 +37,7 @@ from services.errors.account import NoPermissionError
 from services.errors.dataset import DatasetNameDuplicateError
 from services.errors.document import DocumentIndexingError
 from services.errors.file import FileNotExistsError
-from services.feature_service import FeatureService
+from services.feature_service import FeatureModel, FeatureService
 from services.vector_service import VectorService
 from tasks.clean_notion_document_task import clean_notion_document_task
 from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
@@ -469,6 +469,9 @@ class DocumentService:
                 batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
                 if count > batch_upload_limit:
                     raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
+
+                DocumentService.check_documents_upload_quota(count, features)
+
         # if dataset is empty, update dataset data_source_type
         if not dataset.data_source_type:
             dataset.data_source_type = document_data["data_source"]["type"]
@@ -619,6 +622,12 @@ class DocumentService:
 
         return documents, batch
 
+    @staticmethod
+    def check_documents_upload_quota(count: int, features: FeatureModel):
+        can_upload_size = features.documents_upload_quota.limit - features.documents_upload_quota.size
+        if count > can_upload_size:
+            raise ValueError(f'You have reached the limit of your subscription. Only {can_upload_size} documents can be uploaded.')
+
     @staticmethod
     def build_document(dataset: Dataset, process_rule_id: str, data_source_type: str, document_form: str,
                        document_language: str, data_source_info: dict, created_from: str, position: int,
@@ -763,6 +772,8 @@ class DocumentService:
             if count > batch_upload_limit:
                 raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
 
+            DocumentService.check_documents_upload_quota(count, features)
+
         embedding_model = None
         dataset_collection_binding_id = None
         retrieval_model = None

+ 4 - 0
api/services/feature_service.py

@@ -25,6 +25,7 @@ class FeatureModel(BaseModel):
     apps: LimitationModel = LimitationModel(size=0, limit=10)
     vector_space: LimitationModel = LimitationModel(size=0, limit=5)
     annotation_quota_limit: LimitationModel = LimitationModel(size=0, limit=10)
+    documents_upload_quota: LimitationModel = LimitationModel(size=0, limit=50)
     docs_processing: str = 'standard'
     can_replace_logo: bool = False
 
@@ -63,6 +64,9 @@ class FeatureService:
         features.vector_space.size = billing_info['vector_space']['size']
         features.vector_space.limit = billing_info['vector_space']['limit']
 
+        features.documents_upload_quota.size = billing_info['documents_upload_quota']['size']
+        features.documents_upload_quota.limit = billing_info['documents_upload_quota']['limit']
+
         features.annotation_quota_limit.size = billing_info['annotation_quota_limit']['size']
         features.annotation_quota_limit.limit = billing_info['annotation_quota_limit']['limit']
 

+ 4 - 0
web/app/components/billing/config.ts

@@ -16,6 +16,7 @@ export const ALL_PLANS: Record<Plan, PlanInfo> = {
     teamMembers: 1,
     buildApps: 10,
     vectorSpace: 5,
+    documentsUploadQuota: 50,
     documentProcessingPriority: Priority.standard,
     logHistory: 30,
     customTools: unAvailable,
@@ -32,6 +33,7 @@ export const ALL_PLANS: Record<Plan, PlanInfo> = {
     teamMembers: 3,
     buildApps: 50,
     vectorSpace: 200,
+    documentsUploadQuota: 500,
     documentProcessingPriority: Priority.priority,
     logHistory: NUM_INFINITE,
     customTools: 10,
@@ -48,6 +50,7 @@ export const ALL_PLANS: Record<Plan, PlanInfo> = {
     teamMembers: NUM_INFINITE,
     buildApps: NUM_INFINITE,
     vectorSpace: 1000,
+    documentsUploadQuota: 1000,
     documentProcessingPriority: Priority.topPriority,
     logHistory: NUM_INFINITE,
     customTools: NUM_INFINITE,
@@ -64,6 +67,7 @@ export const ALL_PLANS: Record<Plan, PlanInfo> = {
     teamMembers: NUM_INFINITE,
     buildApps: NUM_INFINITE,
     vectorSpace: NUM_INFINITE,
+    documentsUploadQuota: NUM_INFINITE,
     documentProcessingPriority: Priority.topPriority,
     logHistory: NUM_INFINITE,
     customTools: NUM_INFINITE,

+ 7 - 0
web/app/components/billing/pricing/plan-item.tsx

@@ -129,6 +129,9 @@ const PlanItem: FC<Props> = ({
             <div className='mt-3.5 flex items-center space-x-1'>
               <div>+ {t('billing.plansCommon.supportItems.logoChange')}</div>
             </div>
+            <div className='mt-3.5 flex items-center space-x-1'>
+              <div>+ {t('billing.plansCommon.supportItems.bulkUpload')}</div>
+            </div>
             <div className='mt-3.5 flex items-center space-x-1'>
               <div className='flex items-center'>
                 +
@@ -264,6 +267,10 @@ const PlanItem: FC<Props> = ({
           value={planInfo.vectorSpace === NUM_INFINITE ? t('billing.plansCommon.unlimited') as string : (planInfo.vectorSpace >= 1000 ? `${planInfo.vectorSpace / 1000}G` : `${planInfo.vectorSpace}MB`)}
           tooltip={t('billing.plansCommon.vectorSpaceBillingTooltip') as string}
         />
+        <KeyValue
+          label={t('billing.plansCommon.documentsUploadQuota')}
+          value={planInfo.vectorSpace === NUM_INFINITE ? t('billing.plansCommon.unlimited') as string : planInfo.documentsUploadQuota}
+        />
         <KeyValue
           label={t('billing.plansCommon.documentProcessingPriority')}
           value={t(`billing.plansCommon.priority.${planInfo.documentProcessingPriority}`) as string}

+ 1 - 0
web/app/components/billing/type.ts

@@ -17,6 +17,7 @@ export type PlanInfo = {
   teamMembers: number
   buildApps: number
   vectorSpace: number
+  documentsUploadQuota: number
   documentProcessingPriority: Priority
   logHistory: number
   customTools: string | number

+ 34 - 25
web/app/components/datasets/create/file-uploader/index.tsx

@@ -23,6 +23,7 @@ type IFileUploaderProps = {
   onFileUpdate: (fileItem: FileItem, progress: number, list: FileItem[]) => void
   onFileListUpdate?: (files: FileItem[]) => void
   onPreview: (file: File) => void
+  notSupportBatchUpload?: boolean
 }
 
 const FileUploader = ({
@@ -32,6 +33,7 @@ const FileUploader = ({
   onFileUpdate,
   onFileListUpdate,
   onPreview,
+  notSupportBatchUpload,
 }: IFileUploaderProps) => {
   const { t } = useTranslation()
   const { notify } = useContext(ToastContext)
@@ -40,6 +42,7 @@ const FileUploader = ({
   const dropRef = useRef<HTMLDivElement>(null)
   const dragRef = useRef<HTMLDivElement>(null)
   const fileUploader = useRef<HTMLInputElement>(null)
+  const hideUpload = notSupportBatchUpload && fileList.length > 0
 
   const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig)
   const { data: supportFileTypesResponse } = useSWR({ url: '/files/support-type' }, fetchSupportFileTypes)
@@ -131,7 +134,7 @@ const FileUploader = ({
       xhr: new XMLHttpRequest(),
       data: formData,
       onprogress: onProgress,
-    })
+    }, false, undefined, '?source=datasets')
       .then((res: File) => {
         const completeFile = {
           fileID: fileItem.fileID,
@@ -143,8 +146,8 @@ const FileUploader = ({
         onFileUpdate(completeFile, 100, fileListCopy)
         return Promise.resolve({ ...completeFile })
       })
-      .catch(() => {
-        notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.failed') })
+      .catch((e) => {
+        notify({ type: 'error', message: e?.response?.code === 'forbidden' ? e?.response?.message : t('datasetCreation.stepOne.uploader.failed') })
         onFileUpdate(fileItem, -2, fileListCopy)
         return Promise.resolve({ ...fileItem })
       })
@@ -252,30 +255,36 @@ const FileUploader = ({
 
   return (
     <div className={s.fileUploader}>
-      <input
-        ref={fileUploader}
-        id="fileUploader"
-        style={{ display: 'none' }}
-        type="file"
-        multiple
-        accept={ACCEPTS.join(',')}
-        onChange={fileChangeHandle}
-      />
+      {!hideUpload && (
+        <input
+          ref={fileUploader}
+          id="fileUploader"
+          style={{ display: 'none' }}
+          type="file"
+          multiple={!notSupportBatchUpload}
+          accept={ACCEPTS.join(',')}
+          onChange={fileChangeHandle}
+        />
+      )}
+
       <div className={cn(s.title, titleClassName)}>{t('datasetCreation.stepOne.uploader.title')}</div>
-      <div ref={dropRef} className={cn(s.uploader, dragging && s.dragging)}>
-        <div className='flex justify-center items-center min-h-6 mb-2'>
-          <span className={s.uploadIcon} />
-          <span>
-            {t('datasetCreation.stepOne.uploader.button')}
-            <label className={s.browse} onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
-          </span>
+      {!hideUpload && (
+
+        <div ref={dropRef} className={cn(s.uploader, dragging && s.dragging)}>
+          <div className='flex justify-center items-center min-h-6 mb-2'>
+            <span className={s.uploadIcon} />
+            <span>
+              {t('datasetCreation.stepOne.uploader.button')}
+              <label className={s.browse} onClick={selectHandle}>{t('datasetCreation.stepOne.uploader.browse')}</label>
+            </span>
+          </div>
+          <div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip', {
+            size: fileUploadConfig.file_size_limit,
+            supportTypes: supportTypesShowNames,
+          })}</div>
+          {dragging && <div ref={dragRef} className={s.draggingCover} />}
         </div>
-        <div className={s.tip}>{t('datasetCreation.stepOne.uploader.tip', {
-          size: fileUploadConfig.file_size_limit,
-          supportTypes: supportTypesShowNames,
-        })}</div>
-        {dragging && <div ref={dragRef} className={s.draggingCover} />}
-      </div>
+      )}
       <div className={s.fileList}>
         {fileList.map((fileItem, index) => (
           <div

+ 3 - 2
web/app/components/datasets/create/step-one/index.tsx

@@ -39,7 +39,7 @@ export const NotionConnector = ({ onSetting }: NotionConnectorProps) => {
 
   return (
     <div className={s.notionConnectionTip}>
-      <span className={s.notionIcon}/>
+      <span className={s.notionIcon} />
       <div className={s.title}>{t('datasetCreation.stepOne.notionSyncTitle')}</div>
       <div className={s.tip}>{t('datasetCreation.stepOne.notionSyncTip')}</div>
       <Button className='h-8' type='primary' onClick={onSetting}>{t('datasetCreation.stepOne.connect')}</Button>
@@ -92,7 +92,7 @@ const StepOne = ({
   const hasNotin = notionPages.length > 0
   const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
   const isShowVectorSpaceFull = (allFileLoaded || hasNotin) && isVectorSpaceFull && enableBilling
-
+  const notSupportBatchUpload = enableBilling && plan.type === 'sandbox'
   const nextDisabled = useMemo(() => {
     if (!files.length)
       return true
@@ -169,6 +169,7 @@ const StepOne = ({
                 onFileListUpdate={updateFileList}
                 onFileUpdate={updateFile}
                 onPreview={updateCurrentFile}
+                notSupportBatchUpload={notSupportBatchUpload}
               />
               {isShowVectorSpaceFull && (
                 <div className='max-w-[640px] mb-4'>

+ 2 - 0
web/i18n/en-US/billing.ts

@@ -32,6 +32,7 @@ const translation = {
     vectorSpace: 'Vector Space',
     vectorSpaceBillingTooltip: 'Each 1MB can store about 1.2million characters of vectorized data(estimated using OpenAI Embeddings, varies across models).',
     vectorSpaceTooltip: 'Vector Space is the long-term memory system required for LLMs  to comprehend your data.',
+    documentsUploadQuota: 'Documents Upload Quota',
     documentProcessingPriority: 'Document Processing Priority',
     documentProcessingPriorityTip: 'For higher document processing priority, please upgrade your plan.',
     documentProcessingPriorityUpgrade: 'Process more data with higher accuracy at faster speeds.',
@@ -56,6 +57,7 @@ const translation = {
       dedicatedAPISupport: 'Dedicated API support',
       customIntegration: 'Custom integration and support',
       ragAPIRequest: 'RAG API Requests',
+      bulkUpload: 'Bulk upload documents',
       agentMode: 'Agent Mode',
       workflow: 'Workflow',
     },

+ 2 - 0
web/i18n/zh-Hans/billing.ts

@@ -32,6 +32,7 @@ const translation = {
     vectorSpace: '向量空间',
     vectorSpaceTooltip: '向量空间是 LLMs 理解您的数据所需的长期记忆系统。',
     vectorSpaceBillingTooltip: '向量存储是将知识库向量化处理后为让 LLMs 理解数据而使用的长期记忆存储,1MB 大约能满足1.2 million character 的向量化后数据存储(以 OpenAI Embedding 模型估算,不同模型计算方式有差异)。在向量化过程中,实际的压缩或尺寸减小取决于内容的复杂性和冗余性。',
+    documentsUploadQuota: '文档上传配额',
     documentProcessingPriority: '文档处理优先级',
     documentProcessingPriorityTip: '如需更高的文档处理优先级,请升级您的套餐',
     documentProcessingPriorityUpgrade: '以更快的速度、更高的精度处理更多的数据。',
@@ -56,6 +57,7 @@ const translation = {
       dedicatedAPISupport: '专用 API 支持',
       customIntegration: '自定义集成和支持',
       ragAPIRequest: 'RAG API 请求',
+      bulkUpload: '批量上传文档',
       agentMode: '代理模式',
       workflow: '工作流',
     },

+ 2 - 2
web/service/base.ts

@@ -308,7 +308,7 @@ const baseFetch = <T>(
   ]) as Promise<T>
 }
 
-export const upload = (options: any, isPublicAPI?: boolean, url?: string): Promise<any> => {
+export const upload = (options: any, isPublicAPI?: boolean, url?: string, searchParams?: string): Promise<any> => {
   const urlPrefix = isPublicAPI ? PUBLIC_API_PREFIX : API_PREFIX
   let token = ''
   if (isPublicAPI) {
@@ -329,7 +329,7 @@ export const upload = (options: any, isPublicAPI?: boolean, url?: string): Promi
   }
   const defaultOptions = {
     method: 'POST',
-    url: url ? `${urlPrefix}${url}` : `${urlPrefix}/files/upload`,
+    url: (url ? `${urlPrefix}${url}` : `${urlPrefix}/files/upload`) + (searchParams || ''),
     headers: {
       Authorization: `Bearer ${token}`,
     },