瀏覽代碼

feat: add GET upload file API endpoint to dataset service api (#11899)

eux 3 月之前
父節點
當前提交
20f090537f

+ 1 - 1
api/controllers/service_api/__init__.py

@@ -7,4 +7,4 @@ api = ExternalApi(bp)
 
 from . import index
 from .app import app, audio, completion, conversation, file, message, workflow
-from .dataset import dataset, document, hit_testing, segment
+from .dataset import dataset, document, hit_testing, segment, upload_file

+ 54 - 0
api/controllers/service_api/dataset/upload_file.py

@@ -0,0 +1,54 @@
+from werkzeug.exceptions import NotFound
+
+from controllers.service_api import api
+from controllers.service_api.wraps import (
+    DatasetApiResource,
+)
+from core.file import helpers as file_helpers
+from extensions.ext_database import db
+from models.dataset import Dataset
+from models.model import UploadFile
+from services.dataset_service import DocumentService
+
+
+class UploadFileApi(DatasetApiResource):
+    def get(self, tenant_id, dataset_id, document_id):
+        """Get upload file."""
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check upload file
+        if document.data_source_type != "upload_file":
+            raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
+        data_source_info = document.data_source_info_dict
+        if data_source_info and "upload_file_id" in data_source_info:
+            file_id = data_source_info["upload_file_id"]
+            upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
+            if not upload_file:
+                raise NotFound("UploadFile not found.")
+        else:
+            raise ValueError("Upload file id not found in document data source info.")
+
+        url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
+        return {
+            "id": upload_file.id,
+            "name": upload_file.name,
+            "size": upload_file.size,
+            "extension": upload_file.extension,
+            "url": url,
+            "download_url": f"{url}&as_attachment=true",
+            "mime_type": upload_file.mime_type,
+            "created_by": upload_file.created_by,
+            "created_at": upload_file.created_at.timestamp(),
+        }, 200
+
+
+api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")

+ 51 - 0
web/app/(commonLayout)/datasets/template/template.en.mdx

@@ -1106,6 +1106,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 
 <hr className='ml-0 mr-0' />
 
+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
+  method='GET'
+  title='Get Upload File'
+  name='#get_upload_file'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Knowledge ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        Document ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "id": "file_id",
+      "name": "file_name",
+      "size": 1024,
+      "extension": "txt",
+      "url": "preview_url",
+      "download_url": "download_url",
+      "mime_type": "text/plain",
+      "created_by": "user_id",
+      "created_at": 1728734540,
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
 <Heading
   url='/datasets/{dataset_id}/retrieve'
   method='POST'

+ 51 - 0
web/app/(commonLayout)/datasets/template/template.zh.mdx

@@ -1107,6 +1107,57 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
 
 <hr className='ml-0 mr-0' />
 
+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/upload-file'
+  method='GET'
+  title='获取上传文件'
+  name='#get_upload_file'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        知识库 ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        文档 ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/upload-file"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/upload-file' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "id": "file_id",
+      "name": "file_name",
+      "size": 1024,
+      "extension": "txt",
+      "url": "preview_url",
+      "download_url": "download_url",
+      "mime_type": "text/plain",
+      "created_by": "user_id",
+      "created_at": 1728734540,
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+<hr className='ml-0 mr-0' />
+
 <Heading
   url='/datasets/{dataset_id}/retrieve'
   method='POST'