1 gadu atpakaļ · 97fe817186
--- a/api/.env.example
+++ b/api/.env.example
@@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL=
 
				 
			
 
				 SSRF_PROXY_HTTP_URL=
			
 
				 SSRF_PROXY_HTTPS_URL=
			
 
				+
			
 
				+BATCH_UPLOAD_LIMIT=10
			
--- a/api/config.py
+++ b/api/config.py
@@ -56,6 +56,7 @@ DEFAULTS = {
 
				     'BILLING_ENABLED': 'False',
			
 
				     'CAN_REPLACE_LOGO': 'False',
			
 
				     'ETL_TYPE': 'dify',
			
 
				+    'BATCH_UPLOAD_LIMIT': 20
			
 
				 }
			
 
				 
			
 
				 
			
@@ -285,6 +286,8 @@ class Config:
 
				         self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
			
 
				         self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
			
 
				 
			
 
				+        self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
			
 
				+
			
 
				 
			
 
				 class CloudEditionConfig(Config):
			
 
				 
			
--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
 
				 from models.dataset import Document as DatasetDocument
			
 
				 from models.model import UploadFile
			
 
				 from models.source import DataSourceBinding
			
 
				+from services.feature_service import FeatureService
			
 
				 
			
 
				 
			
 
				 class IndexingRunner:
			
@@ -244,6 +245,14 @@ class IndexingRunner:
 
				         """
			
 
				         Estimate the indexing for the document.
			
 
				         """
			
 
				+        # check document limit
			
 
				+        features = FeatureService.get_features(tenant_id)
			
 
				+        if features.billing.enabled:
			
 
				+            count = len(file_details)
			
 
				+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
			
 
				+            if count > batch_upload_limit:
			
 
				+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
			
 
				+
			
 
				         embedding_model_instance = None
			
 
				         if dataset_id:
			
 
				             dataset = Dataset.query.filter_by(
			
@@ -361,6 +370,14 @@ class IndexingRunner:
 
				         """
			
 
				         Estimate the indexing for the document.
			
 
				         """
			
 
				+        # check document limit
			
 
				+        features = FeatureService.get_features(tenant_id)
			
 
				+        if features.billing.enabled:
			
 
				+            count = len(notion_info_list)
			
 
				+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
			
 
				+            if count > batch_upload_limit:
			
 
				+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
			
 
				+
			
 
				         embedding_model_instance = None
			
 
				         if dataset_id:
			
 
				             dataset = Dataset.query.filter_by(
			
--- a/api/services/annotation_service.py
+++ b/api/services/annotation_service.py
@@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound
 
				 from extensions.ext_database import db
			
 
				 from extensions.ext_redis import redis_client
			
 
				 from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
			
 
				+from services.feature_service import FeatureService
			
 
				 from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
			
 
				 from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
			
 
				 from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
			
@@ -284,6 +285,12 @@ class AppAnnotationService:
 
				                 result.append(content)
			
 
				             if len(result) == 0:
			
 
				                 raise ValueError("The CSV file is empty.")
			
 
				+            # check annotation limit
			
 
				+            features = FeatureService.get_features(current_user.current_tenant_id)
			
 
				+            if features.billing.enabled:
			
 
				+                annotation_quota_limit = features.annotation_quota_limit
			
 
				+                if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
			
 
				+                    raise ValueError("The number of annotations exceeds the limit of your subscription.")
			
 
				             # async job
			
 
				             job_id = str(uuid.uuid4())
			
 
				             indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
			
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError
 
				 from services.errors.dataset import DatasetNameDuplicateError
			
 
				 from services.errors.document import DocumentIndexingError
			
 
				 from services.errors.file import FileNotExistsError
			
 
				+from services.feature_service import FeatureService
			
 
				 from services.vector_service import VectorService
			
 
				 from tasks.clean_notion_document_task import clean_notion_document_task
			
 
				 from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
			
@@ -452,7 +453,9 @@ class DocumentService:
 
				                                       created_from: str = 'web'):
			
 
				 
			
 
				         # check document limit
			
 
				-        if current_app.config['EDITION'] == 'CLOUD':
			
 
				+        features = FeatureService.get_features(current_user.current_tenant_id)
			
 
				+
			
 
				+        if features.billing.enabled:
			
 
				             if 'original_document_id' not in document_data or not document_data['original_document_id']:
			
 
				                 count = 0
			
 
				                 if document_data["data_source"]["type"] == "upload_file":
			
@@ -462,6 +465,9 @@ class DocumentService:
 
				                     notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
			
 
				                     for notion_info in notion_info_list:
			
 
				                         count = count + len(notion_info['pages'])
			
 
				+                batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
			
 
				+                if count > batch_upload_limit:
			
 
				+                    raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
			
 
				         # if dataset is empty, update dataset data_source_type
			
 
				         if not dataset.data_source_type:
			
 
				             dataset.data_source_type = document_data["data_source"]["type"]
			
@@ -741,14 +747,20 @@ class DocumentService:
 
				 
			
 
				     @staticmethod
			
 
				     def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
			
 
				-        count = 0
			
 
				-        if document_data["data_source"]["type"] == "upload_file":
			
 
				-            upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
			
 
				-            count = len(upload_file_list)
			
 
				-        elif document_data["data_source"]["type"] == "notion_import":
			
 
				-            notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
			
 
				-            for notion_info in notion_info_list:
			
 
				-                count = count + len(notion_info['pages'])
			
 
				+        features = FeatureService.get_features(current_user.current_tenant_id)
			
 
				+
			
 
				+        if features.billing.enabled:
			
 
				+            count = 0
			
 
				+            if document_data["data_source"]["type"] == "upload_file":
			
 
				+                upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
			
 
				+                count = len(upload_file_list)
			
 
				+            elif document_data["data_source"]["type"] == "notion_import":
			
 
				+                notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
			
 
				+                for notion_info in notion_info_list:
			
 
				+                    count = count + len(notion_info['pages'])
			
 
				+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
			
 
				+            if count > batch_upload_limit:
			
 
				+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
			
 
				 
			
 
				         embedding_model = None
			
 
				         dataset_collection_binding_id = None
			
@@ -1139,7 +1151,7 @@ class SegmentService:
 
				                     segment.answer = args['answer']
			
 
				                 if 'keywords' in args and args['keywords']:
			
 
				                     segment.keywords = args['keywords']
			
 
				-                if'enabled' in args and args['enabled'] is not None:
			
 
				+                if 'enabled' in args and args['enabled'] is not None:
			
 
				                     segment.enabled = args['enabled']
			
 
				                 db.session.add(segment)
			
 
				                 db.session.commit()
			
--- a/api/services/file_service.py
+++ b/api/services/file_service.py
@@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
 
				 IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
			
 
				 IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
			
 
				 
			
 
				-ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
			
 
				+ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
			
 
				 UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
			
 
				-                                   'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
			
 
				+                                   'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
			
 
				 PREVIEW_WORDS_LIMIT = 3000
			
 
				 
			
 
				 
			
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@@ -4,10 +4,12 @@ import time
 
				 
			
 
				 import click
			
 
				 from celery import shared_task
			
 
				+from flask import current_app
			
 
				 
			
 
				 from core.indexing_runner import DocumentIsPausedException, IndexingRunner
			
 
				 from extensions.ext_database import db
			
 
				-from models.dataset import Document
			
 
				+from models.dataset import Dataset, Document
			
 
				+from services.feature_service import FeatureService
			
 
				 
			
 
				 
			
 
				 @shared_task(queue='dataset')
			
@@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list):
 
				     """
			
 
				     documents = []
			
 
				     start_at = time.perf_counter()
			
 
				+
			
 
				+    dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
			
 
				+
			
 
				+    # check document limit
			
 
				+    features = FeatureService.get_features(dataset.tenant_id)
			
 
				+    try:
			
 
				+        if features.billing.enabled:
			
 
				+            vector_space = features.vector_space
			
 
				+            count = len(document_ids)
			
 
				+            batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
			
 
				+            if count > batch_upload_limit:
			
 
				+                raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
			
 
				+            if 0 < vector_space.limit <= vector_space.size:
			
 
				+                raise ValueError("Your total number of documents plus the number of uploads have over the limit of "
			
 
				+                                 "your subscription.")
			
 
				+    except Exception as e:
			
 
				+        for document_id in document_ids:
			
 
				+            document = db.session.query(Document).filter(
			
 
				+                Document.id == document_id,
			
 
				+                Document.dataset_id == dataset_id
			
 
				+            ).first()
			
 
				+            if document:
			
 
				+                document.indexing_status = 'error'
			
 
				+                document.error = str(e)
			
 
				+                document.stopped_at = datetime.datetime.utcnow()
			
 
				+                db.session.add(document)
			
 
				+        db.session.commit()
			
 
				+        return
			
 
				+
			
 
				     for document_id in document_ids:
			
 
				         logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
			
 
				 
			
--- a/web/app/components/datasets/create/file-uploader/index.tsx
+++ b/web/app/components/datasets/create/file-uploader/index.tsx
@@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
 
				 import I18n from '@/context/i18n'
			
 
				 import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
			
 
				 
			
 
				+const FILES_NUMBER_LIMIT = 20
			
 
				+
			
 
				 type IFileUploaderProps = {
			
 
				   fileList: FileItem[]
			
 
				   titleClassName?: string
			
@@ -176,6 +178,11 @@ const FileUploader = ({
 
				     if (!files.length)
			
 
				       return false
			
 
				 
			
 
				+    if (files.length + fileList.length > FILES_NUMBER_LIMIT) {
			
 
				+      notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) })
			
 
				+      return false
			
 
				+    }
			
 
				+
			
 
				     const preparedFiles = files.map((file, index) => ({
			
 
				       fileID: `file${index}-${Date.now()}`,
			
 
				       file,
			
@@ -185,7 +192,7 @@ const FileUploader = ({
 
				     prepareFileList(newFiles)
			
 
				     fileListRef.current = newFiles
			
 
				     uploadMultipleFiles(preparedFiles)
			
 
				-  }, [prepareFileList, uploadMultipleFiles])
			
 
				+  }, [prepareFileList, uploadMultipleFiles, notify, t, fileList])
			
 
				 
			
 
				   const handleDragEnter = (e: DragEvent) => {
			
 
				     e.preventDefault()
			
--- a/web/i18n/lang/dataset-creation.en.ts
+++ b/web/i18n/lang/dataset-creation.en.ts
@@ -28,6 +28,7 @@ const translation = {
 
				         typeError: 'File type not supported',
			
 
				         size: 'File too large. Maximum is {{size}}MB',
			
 
				         count: 'Multiple files not supported',
			
 
				+        filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.',
			
 
				       },
			
 
				       cancel: 'Cancel',
			
 
				       change: 'Change',
			
--- a/web/i18n/lang/dataset-creation.pt.ts
+++ b/web/i18n/lang/dataset-creation.pt.ts
@@ -28,6 +28,7 @@ const translation = {
 
				         typeError: 'Tipo de arquivo não suportado',
			
 
				         size: 'Arquivo muito grande. Máximo é {{size}}MB',
			
 
				         count: 'Vários arquivos não suportados',
			
 
				+        filesNumber: 'Limite de upload em massa {{filesNumber}}.',
			
 
				       },
			
 
				       cancel: 'Cancelar',
			
 
				       change: 'Alterar',
			
--- a/web/i18n/lang/dataset-creation.uk.ts
+++ b/web/i18n/lang/dataset-creation.uk.ts
@@ -28,6 +28,7 @@ const translation = {
 
				         typeError: 'Тип файлу не підтримується',
			
 
				         size: 'Файл занадто великий. Максимум – {{size}} МБ',
			
 
				         count: 'Не підтримується завантаження кількох файлів',
			
 
				+        filesNumber: 'Ліміт масового завантаження {{filesNumber}}.',
			
 
				       },
			
 
				       cancel: 'Скасувати',
			
 
				       change: 'Змінити',
			
--- a/web/i18n/lang/dataset-creation.zh.ts
+++ b/web/i18n/lang/dataset-creation.zh.ts
@@ -28,6 +28,7 @@ const translation = {
 
				         typeError: '文件类型不支持',
			
 
				         size: '文件太大了，不能超过 {{size}}MB',
			
 
				         count: '暂不支持多个文件',
			
 
				+        filesNumber: '批量上传限制 {{filesNumber}}。',
			
 
				       },
			
 
				       cancel: '取消',
			
 
				       change: '更改文件',