Browse Source

feat: integrate opendal storage (#11508)

Signed-off-by: -LAN- <laipz8200@outlook.com>
-LAN- 4 tháng trước cách đây
mục cha
commit
8d4bb9b40d

+ 22 - 5
api/.env.example

@@ -56,20 +56,36 @@ DB_DATABASE=dify
 
 # Storage configuration
 # use for store upload files, private keys...
-# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
-STORAGE_TYPE=local
-STORAGE_LOCAL_PATH=storage
+# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
+STORAGE_OPENDAL_SCHEME=fs
+# OpenDAL FS
+OPENDAL_FS_ROOT=storage
+# OpenDAL S3
+OPENDAL_S3_ROOT=/
+OPENDAL_S3_BUCKET=your-bucket-name
+OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
+OPENDAL_S3_ACCESS_KEY_ID=your-access-key
+OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
+OPENDAL_S3_REGION=your-region
+OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
+
+# S3 Storage configuration
 S3_USE_AWS_MANAGED_IAM=false
 S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
 S3_BUCKET_NAME=your-bucket-name
 S3_ACCESS_KEY=your-access-key
 S3_SECRET_KEY=your-secret-key
 S3_REGION=your-region
+
 # Azure Blob Storage configuration
 AZURE_BLOB_ACCOUNT_NAME=your-account-name
 AZURE_BLOB_ACCOUNT_KEY=your-account-key
 AZURE_BLOB_CONTAINER_NAME=yout-container-name
 AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
+
 # Aliyun oss Storage configuration
 ALIYUN_OSS_BUCKET_NAME=your-bucket-name
 ALIYUN_OSS_ACCESS_KEY=your-access-key
@@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
 ALIYUN_OSS_REGION=your-region
 # Don't start with '/'. OSS doesn't support leading slash in object names.
 ALIYUN_OSS_PATH=your-path
+
 # Google Storage configuration
 GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
 GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
@@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 
-
-# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
+# Vector database configuration
+# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
 VECTOR_STORE=weaviate
 
 # Weaviate configuration

+ 53 - 37
api/configs/middleware/__init__.py

@@ -1,54 +1,69 @@
-from typing import Any, Optional
+from typing import Any, Literal, Optional
 from urllib.parse import quote_plus
 
 from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings
 
-from configs.middleware.cache.redis_config import RedisConfig
-from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
-from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
-from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
-from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
-from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
-from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
-from configs.middleware.storage.oci_storage_config import OCIStorageConfig
-from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
-from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
-from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
-from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
-from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
-from configs.middleware.vdb.chroma_config import ChromaConfig
-from configs.middleware.vdb.couchbase_config import CouchbaseConfig
-from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
-from configs.middleware.vdb.lindorm_config import LindormConfig
-from configs.middleware.vdb.milvus_config import MilvusConfig
-from configs.middleware.vdb.myscale_config import MyScaleConfig
-from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
-from configs.middleware.vdb.opensearch_config import OpenSearchConfig
-from configs.middleware.vdb.oracle_config import OracleConfig
-from configs.middleware.vdb.pgvector_config import PGVectorConfig
-from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig
-from configs.middleware.vdb.qdrant_config import QdrantConfig
-from configs.middleware.vdb.relyt_config import RelytConfig
-from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
-from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
-from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
-from configs.middleware.vdb.upstash_config import UpstashConfig
-from configs.middleware.vdb.vikingdb_config import VikingDBConfig
-from configs.middleware.vdb.weaviate_config import WeaviateConfig
+from .cache.redis_config import RedisConfig
+from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
+from .storage.amazon_s3_storage_config import S3StorageConfig
+from .storage.azure_blob_storage_config import AzureBlobStorageConfig
+from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
+from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
+from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
+from .storage.oci_storage_config import OCIStorageConfig
+from .storage.opendal_storage_config import OpenDALStorageConfig
+from .storage.supabase_storage_config import SupabaseStorageConfig
+from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
+from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
+from .vdb.analyticdb_config import AnalyticdbConfig
+from .vdb.baidu_vector_config import BaiduVectorDBConfig
+from .vdb.chroma_config import ChromaConfig
+from .vdb.couchbase_config import CouchbaseConfig
+from .vdb.elasticsearch_config import ElasticsearchConfig
+from .vdb.lindorm_config import LindormConfig
+from .vdb.milvus_config import MilvusConfig
+from .vdb.myscale_config import MyScaleConfig
+from .vdb.oceanbase_config import OceanBaseVectorConfig
+from .vdb.opensearch_config import OpenSearchConfig
+from .vdb.oracle_config import OracleConfig
+from .vdb.pgvector_config import PGVectorConfig
+from .vdb.pgvectors_config import PGVectoRSConfig
+from .vdb.qdrant_config import QdrantConfig
+from .vdb.relyt_config import RelytConfig
+from .vdb.tencent_vector_config import TencentVectorDBConfig
+from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
+from .vdb.tidb_vector_config import TiDBVectorConfig
+from .vdb.upstash_config import UpstashConfig
+from .vdb.vikingdb_config import VikingDBConfig
+from .vdb.weaviate_config import WeaviateConfig
 
 
 class StorageConfig(BaseSettings):
-    STORAGE_TYPE: str = Field(
+    STORAGE_TYPE: Literal[
+        "opendal",
+        "s3",
+        "aliyun-oss",
+        "azure-blob",
+        "baidu-obs",
+        "google-storage",
+        "huawei-obs",
+        "oci-storage",
+        "tencent-cos",
+        "volcengine-tos",
+        "supabase",
+        "local",
+    ] = Field(
         description="Type of storage to use."
-        " Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
-        "'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
-        default="local",
+        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
+        "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
+        default="opendal",
     )
 
     STORAGE_LOCAL_PATH: str = Field(
         description="Path for local storage when STORAGE_TYPE is set to 'local'.",
         default="storage",
+        deprecated=True,
     )
 
 
@@ -235,6 +250,7 @@ class MiddlewareConfig(
     GoogleCloudStorageConfig,
     HuaweiCloudOBSStorageConfig,
     OCIStorageConfig,
+    OpenDALStorageConfig,
     S3StorageConfig,
     SupabaseStorageConfig,
     TencentCloudCOSStorageConfig,

+ 51 - 0
api/configs/middleware/storage/opendal_storage_config.py

@@ -0,0 +1,51 @@
+from enum import StrEnum
+from typing import Literal
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class OpenDALScheme(StrEnum):
+    FS = "fs"
+    S3 = "s3"
+
+
+class OpenDALStorageConfig(BaseSettings):
+    STORAGE_OPENDAL_SCHEME: str = Field(
+        default=OpenDALScheme.FS.value,
+        description="OpenDAL scheme.",
+    )
+    # FS
+    OPENDAL_FS_ROOT: str = Field(
+        default="storage",
+        description="Root path for local storage.",
+    )
+    # S3
+    OPENDAL_S3_ROOT: str = Field(
+        default="/",
+        description="Root path for S3 storage.",
+    )
+    OPENDAL_S3_BUCKET: str = Field(
+        default="",
+        description="S3 bucket name.",
+    )
+    OPENDAL_S3_ENDPOINT: str = Field(
+        default="https://s3.amazonaws.com",
+        description="S3 endpoint URL.",
+    )
+    OPENDAL_S3_ACCESS_KEY_ID: str = Field(
+        default="",
+        description="S3 access key ID.",
+    )
+    OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
+        default="",
+        description="S3 secret access key.",
+    )
+    OPENDAL_S3_REGION: str = Field(
+        default="",
+        description="S3 region.",
+    )
+    OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
+        default="",
+        description="S3 server-side encryption.",
+    )

+ 0 - 1
api/docker/entrypoint.sh

@@ -34,7 +34,6 @@ else
       --workers ${SERVER_WORKER_AMOUNT:-1} \
       --worker-class ${SERVER_WORKER_CLASS:-gevent} \
       --timeout ${GUNICORN_TIMEOUT:-200} \
-      --preload \
       app:app
   fi
 fi

+ 103 - 18
api/extensions/ext_storage.py

@@ -1,31 +1,43 @@
 import logging
-from collections.abc import Generator
+from collections.abc import Callable, Generator, Mapping
 from typing import Union
 
 from flask import Flask
 
 from configs import dify_config
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
 from dify_app import DifyApp
 from extensions.storage.base_storage import BaseStorage
 from extensions.storage.storage_type import StorageType
 
+logger = logging.getLogger(__name__)
 
-class Storage:
-    def __init__(self):
-        self.storage_runner = None
 
+class Storage:
     def init_app(self, app: Flask):
         storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
         with app.app_context():
             self.storage_runner = storage_factory()
 
     @staticmethod
-    def get_storage_factory(storage_type: str) -> type[BaseStorage]:
+    def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
         match storage_type:
             case StorageType.S3:
-                from extensions.storage.aws_s3_storage import AwsS3Storage
+                from extensions.storage.opendal_storage import OpenDALStorage
+
+                kwargs = _load_s3_storage_kwargs()
+                return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
+            case StorageType.OPENDAL:
+                from extensions.storage.opendal_storage import OpenDALStorage
+
+                scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
+                kwargs = _load_opendal_storage_kwargs(scheme)
+                return lambda: OpenDALStorage(scheme=scheme, **kwargs)
+            case StorageType.LOCAL:
+                from extensions.storage.opendal_storage import OpenDALStorage
 
-                return AwsS3Storage
+                kwargs = _load_local_storage_kwargs()
+                return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
             case StorageType.AZURE_BLOB:
                 from extensions.storage.azure_blob_storage import AzureBlobStorage
 
@@ -62,16 +74,14 @@ class Storage:
                 from extensions.storage.supabase_storage import SupabaseStorage
 
                 return SupabaseStorage
-            case StorageType.LOCAL | _:
-                from extensions.storage.local_fs_storage import LocalFsStorage
-
-                return LocalFsStorage
+            case _:
+                raise ValueError(f"Unsupported storage type {storage_type}")
 
     def save(self, filename, data):
         try:
             self.storage_runner.save(filename, data)
         except Exception as e:
-            logging.exception(f"Failed to save file {filename}")
+            logger.exception(f"Failed to save file {filename}")
             raise e
 
     def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
@@ -81,45 +91,120 @@ class Storage:
             else:
                 return self.load_once(filename)
         except Exception as e:
-            logging.exception(f"Failed to load file {filename}")
+            logger.exception(f"Failed to load file {filename}")
             raise e
 
     def load_once(self, filename: str) -> bytes:
         try:
             return self.storage_runner.load_once(filename)
         except Exception as e:
-            logging.exception(f"Failed to load_once file {filename}")
+            logger.exception(f"Failed to load_once file {filename}")
             raise e
 
     def load_stream(self, filename: str) -> Generator:
         try:
             return self.storage_runner.load_stream(filename)
         except Exception as e:
-            logging.exception(f"Failed to load_stream file {filename}")
+            logger.exception(f"Failed to load_stream file {filename}")
             raise e
 
     def download(self, filename, target_filepath):
         try:
             self.storage_runner.download(filename, target_filepath)
         except Exception as e:
-            logging.exception(f"Failed to download file {filename}")
+            logger.exception(f"Failed to download file {filename}")
             raise e
 
     def exists(self, filename):
         try:
             return self.storage_runner.exists(filename)
         except Exception as e:
-            logging.exception(f"Failed to check file exists {filename}")
+            logger.exception(f"Failed to check file exists {filename}")
             raise e
 
     def delete(self, filename):
         try:
             return self.storage_runner.delete(filename)
         except Exception as e:
-            logging.exception(f"Failed to delete file {filename}")
+            logger.exception(f"Failed to delete file {filename}")
             raise e
 
 
+def _load_s3_storage_kwargs() -> Mapping[str, str]:
+    """
+    Load the kwargs for S3 storage based on dify_config.
+    Handles special cases like AWS managed IAM and R2.
+    """
+    kwargs = {
+        "root": "/",
+        "bucket": dify_config.S3_BUCKET_NAME,
+        "endpoint": dify_config.S3_ENDPOINT,
+        "access_key_id": dify_config.S3_ACCESS_KEY,
+        "secret_access_key": dify_config.S3_SECRET_KEY,
+        "region": dify_config.S3_REGION,
+    }
+    kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
+
+    # For AWS managed IAM
+    if dify_config.S3_USE_AWS_MANAGED_IAM:
+        from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
+
+        logger.debug("Using AWS managed IAM role for S3")
+        kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
+
+    # For Cloudflare R2
+    if kwargs.get("endpoint"):
+        from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
+
+        if is_r2_endpoint(kwargs["endpoint"]):
+            logger.debug("Using R2 for OpenDAL S3")
+            kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
+
+    return kwargs
+
+
+def _load_local_storage_kwargs() -> Mapping[str, str]:
+    """
+    Load the kwargs for local storage based on dify_config.
+    """
+    return {
+        "root": dify_config.STORAGE_LOCAL_PATH,
+    }
+
+
+def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
+    """
+    Load the kwargs for OpenDAL storage based on the given scheme.
+    """
+    match scheme:
+        case OpenDALScheme.FS:
+            kwargs = {
+                "root": dify_config.OPENDAL_FS_ROOT,
+            }
+        case OpenDALScheme.S3:
+            # Load OpenDAL S3-related configs
+            kwargs = {
+                "root": dify_config.OPENDAL_S3_ROOT,
+                "bucket": dify_config.OPENDAL_S3_BUCKET,
+                "endpoint": dify_config.OPENDAL_S3_ENDPOINT,
+                "access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
+                "secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
+                "region": dify_config.OPENDAL_S3_REGION,
+            }
+
+            # For Cloudflare R2
+            if kwargs.get("endpoint"):
+                from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
+
+                if is_r2_endpoint(kwargs["endpoint"]):
+                    logger.debug("Using R2 for OpenDAL S3")
+                    kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
+        case _:
+            logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
+            kwargs = {}
+    return kwargs
+
+
 storage = Storage()
 
 

+ 0 - 3
api/extensions/storage/base_storage.py

@@ -7,9 +7,6 @@ from collections.abc import Generator
 class BaseStorage(ABC):
     """Interface for file storage."""
 
-    def __init__(self):  # noqa: B027
-        pass
-
     @abstractmethod
     def save(self, filename, data):
         raise NotImplementedError

+ 0 - 62
api/extensions/storage/local_fs_storage.py

@@ -1,62 +0,0 @@
-import os
-import shutil
-from collections.abc import Generator
-from pathlib import Path
-
-from flask import current_app
-
-from configs import dify_config
-from extensions.storage.base_storage import BaseStorage
-
-
-class LocalFsStorage(BaseStorage):
-    """Implementation for local filesystem storage."""
-
-    def __init__(self):
-        super().__init__()
-        folder = dify_config.STORAGE_LOCAL_PATH
-        if not os.path.isabs(folder):
-            folder = os.path.join(current_app.root_path, folder)
-        self.folder = folder
-
-    def _build_filepath(self, filename: str) -> str:
-        """Build the full file path based on the folder and filename."""
-        if not self.folder or self.folder.endswith("/"):
-            return self.folder + filename
-        else:
-            return self.folder + "/" + filename
-
-    def save(self, filename, data):
-        filepath = self._build_filepath(filename)
-        folder = os.path.dirname(filepath)
-        os.makedirs(folder, exist_ok=True)
-        Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
-
-    def load_once(self, filename: str) -> bytes:
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        return Path(filepath).read_bytes()
-
-    def load_stream(self, filename: str) -> Generator:
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        with open(filepath, "rb") as f:
-            while chunk := f.read(4096):  # Read in chunks of 4KB
-                yield chunk
-
-    def download(self, filename, target_filepath):
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        shutil.copyfile(filepath, target_filepath)
-
-    def exists(self, filename):
-        filepath = self._build_filepath(filename)
-        return os.path.exists(filepath)
-
-    def delete(self, filename):
-        filepath = self._build_filepath(filename)
-        if os.path.exists(filepath):
-            os.remove(filepath)

+ 66 - 0
api/extensions/storage/opendal_storage.py

@@ -0,0 +1,66 @@
+from collections.abc import Generator
+from pathlib import Path
+from urllib.parse import urlparse
+
+import opendal
+
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
+from extensions.storage.base_storage import BaseStorage
+
+S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
+S3_R2_COMPATIBLE_KWARGS = {
+    "delete_max_size": "700",
+    "disable_stat_with_override": "true",
+    "region": "auto",
+}
+S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
+    "server_side_encryption": "aws:kms",
+}
+
+
+def is_r2_endpoint(endpoint: str) -> bool:
+    if not endpoint:
+        return False
+
+    parsed_url = urlparse(endpoint)
+    return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
+
+
+class OpenDALStorage(BaseStorage):
+    def __init__(self, scheme: OpenDALScheme, **kwargs):
+        if scheme == OpenDALScheme.FS:
+            Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
+
+        self.op = opendal.Operator(scheme=scheme, **kwargs)
+
+    def save(self, filename: str, data: bytes) -> None:
+        self.op.write(path=filename, bs=data)
+
+    def load_once(self, filename: str) -> bytes:
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        return self.op.read(path=filename)
+
+    def load_stream(self, filename: str) -> Generator:
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        batch_size = 4096
+        file = self.op.open(path=filename, mode="rb")
+        while chunk := file.read(batch_size):
+            yield chunk
+
+    def download(self, filename: str, target_filepath: str):
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        with Path(target_filepath).open("wb") as f:
+            f.write(self.op.read(path=filename))
+
+    def exists(self, filename: str):
+        return self.op.stat(path=filename).mode.is_file()
+
+    def delete(self, filename: str):
+        if self.exists(filename):
+            self.op.delete(path=filename)

+ 1 - 0
api/extensions/storage/storage_type.py

@@ -9,6 +9,7 @@ class StorageType(StrEnum):
     HUAWEI_OBS = "huawei-obs"
     LOCAL = "local"
     OCI_STORAGE = "oci-storage"
+    OPENDAL = "opendal"
     S3 = "s3"
     TENCENT_COS = "tencent-cos"
     VOLCENGINE_TOS = "volcengine-tos"

Những thai đổi đã bị hủy bỏ vì nó quá lớn
+ 398 - 402
api/poetry.lock


+ 1 - 0
api/pyproject.toml

@@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23"
 cos-python-sdk-v5 = "1.9.30"
 esdk-obs-python = "3.24.6.1"
 google-cloud-storage = "2.16.0"
+opendal = "~0.45.12"
 oss2 = "2.18.5"
 supabase = "~2.8.1"
 tos = "~2.7.1"

+ 20 - 0
api/tests/unit_tests/configs/test_opendal_config_parse.py

@@ -0,0 +1,20 @@
+import pytest
+
+from extensions.storage.opendal_storage import is_r2_endpoint
+
+
+@pytest.mark.parametrize(
+    ("endpoint", "expected"),
+    [
+        ("https://bucket.r2.cloudflarestorage.com", True),
+        ("https://custom-domain.r2.cloudflarestorage.com/", True),
+        ("https://bucket.r2.cloudflarestorage.com/path", True),
+        ("https://s3.amazonaws.com", False),
+        ("https://storage.googleapis.com", False),
+        ("http://localhost:9000", False),
+        ("invalid-url", False),
+        ("", False),
+    ],
+)
+def test_is_r2_endpoint(endpoint: str, expected: bool):
+    assert is_r2_endpoint(endpoint) == expected

+ 4 - 4
api/tests/unit_tests/oss/__mock/base.py

@@ -6,7 +6,7 @@ from extensions.storage.base_storage import BaseStorage
 
 
 def get_example_folder() -> str:
-    return "/dify"
+    return "~/dify"
 
 
 def get_example_bucket() -> str:
@@ -22,14 +22,14 @@ def get_example_data() -> bytes:
 
 
 def get_example_filepath() -> str:
-    return "/test"
+    return "~/test"
 
 
 class BaseStorageTest:
     @pytest.fixture(autouse=True)
-    def setup_method(self):
+    def setup_method(self, *args, **kwargs):
         """Should be implemented in child classes to setup specific storage."""
-        self.storage = BaseStorage()
+        self.storage: BaseStorage
 
     def test_save(self):
         """Test saving data."""

+ 0 - 18
api/tests/unit_tests/oss/local/test_local_fs.py

@@ -1,18 +0,0 @@
-from collections.abc import Generator
-
-import pytest
-
-from extensions.storage.local_fs_storage import LocalFsStorage
-from tests.unit_tests.oss.__mock.base import (
-    BaseStorageTest,
-    get_example_folder,
-)
-from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
-
-
-class TestLocalFS(BaseStorageTest):
-    @pytest.fixture(autouse=True)
-    def setup_method(self, setup_local_fs_mock):
-        """Executed before each test method."""
-        self.storage = LocalFsStorage()
-        self.storage.folder = get_example_folder()

+ 0 - 0
api/tests/unit_tests/oss/local/__init__.py → api/tests/unit_tests/oss/opendal/__init__.py


+ 19 - 0
api/tests/unit_tests/oss/opendal/test_opendal.py

@@ -0,0 +1,19 @@
+import pytest
+
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
+from extensions.storage.opendal_storage import OpenDALStorage
+from tests.unit_tests.oss.__mock.base import (
+    BaseStorageTest,
+    get_example_folder,
+)
+from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
+
+
+class TestOpenDAL(BaseStorageTest):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, *args, **kwargs):
+        """Executed before each test method."""
+        self.storage = OpenDALStorage(
+            scheme=OpenDALScheme.FS,
+            root=get_example_folder(),
+        )

+ 17 - 4
docker/.env.example

@@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
 # ------------------------------
 
 # The type of storage to use for storing user files.
-# Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
-# Default: `local`
-STORAGE_TYPE=local
-STORAGE_LOCAL_PATH=storage
+# Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
+# Default: `opendal`
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL Configuration, refer to https://github.com/apache/opendal
+# The scheme for the OpenDAL storage.
+STORAGE_OPENDAL_SCHEME=fs
+# OpenDAL FS
+OPENDAL_FS_ROOT=storage
+# OpenDAL S3
+OPENDAL_S3_ROOT=/
+OPENDAL_S3_BUCKET=your-bucket-name
+OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
+OPENDAL_S3_ACCESS_KEY_ID=your-access-key
+OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
+OPENDAL_S3_REGION=your-region
+OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
 
 # S3 Configuration
 # Whether to use AWS managed IAM roles for authenticating with the S3 service.

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác