Bladeren bron

feat: integrate opendal storage (#11508)

Signed-off-by: -LAN- <laipz8200@outlook.com>
-LAN- 4 maanden geleden
bovenliggende
commit
8d4bb9b40d

+ 22 - 5
api/.env.example

@@ -56,20 +56,36 @@ DB_DATABASE=dify
 
 # Storage configuration
 # use for store upload files, private keys...
-# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
-STORAGE_TYPE=local
-STORAGE_LOCAL_PATH=storage
+# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
+STORAGE_OPENDAL_SCHEME=fs
+# OpenDAL FS
+OPENDAL_FS_ROOT=storage
+# OpenDAL S3
+OPENDAL_S3_ROOT=/
+OPENDAL_S3_BUCKET=your-bucket-name
+OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
+OPENDAL_S3_ACCESS_KEY_ID=your-access-key
+OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
+OPENDAL_S3_REGION=your-region
+OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
+
+# S3 Storage configuration
 S3_USE_AWS_MANAGED_IAM=false
 S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
 S3_BUCKET_NAME=your-bucket-name
 S3_ACCESS_KEY=your-access-key
 S3_SECRET_KEY=your-secret-key
 S3_REGION=your-region
+
 # Azure Blob Storage configuration
 AZURE_BLOB_ACCOUNT_NAME=your-account-name
 AZURE_BLOB_ACCOUNT_KEY=your-account-key
 AZURE_BLOB_CONTAINER_NAME=yout-container-name
 AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
+
 # Aliyun oss Storage configuration
 ALIYUN_OSS_BUCKET_NAME=your-bucket-name
 ALIYUN_OSS_ACCESS_KEY=your-access-key
@@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
 ALIYUN_OSS_REGION=your-region
 # Don't start with '/'. OSS doesn't support leading slash in object names.
 ALIYUN_OSS_PATH=your-path
+
 # Google Storage configuration
 GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
 GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
@@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 
-
-# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
+# Vector database configuration
+# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
 VECTOR_STORE=weaviate
 
 # Weaviate configuration

+ 53 - 37
api/configs/middleware/__init__.py

@@ -1,54 +1,69 @@
-from typing import Any, Optional
+from typing import Any, Literal, Optional
 from urllib.parse import quote_plus
 
 from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings
 
-from configs.middleware.cache.redis_config import RedisConfig
-from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
-from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
-from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
-from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
-from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
-from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
-from configs.middleware.storage.oci_storage_config import OCIStorageConfig
-from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
-from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
-from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
-from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
-from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
-from configs.middleware.vdb.chroma_config import ChromaConfig
-from configs.middleware.vdb.couchbase_config import CouchbaseConfig
-from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
-from configs.middleware.vdb.lindorm_config import LindormConfig
-from configs.middleware.vdb.milvus_config import MilvusConfig
-from configs.middleware.vdb.myscale_config import MyScaleConfig
-from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
-from configs.middleware.vdb.opensearch_config import OpenSearchConfig
-from configs.middleware.vdb.oracle_config import OracleConfig
-from configs.middleware.vdb.pgvector_config import PGVectorConfig
-from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig
-from configs.middleware.vdb.qdrant_config import QdrantConfig
-from configs.middleware.vdb.relyt_config import RelytConfig
-from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
-from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
-from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
-from configs.middleware.vdb.upstash_config import UpstashConfig
-from configs.middleware.vdb.vikingdb_config import VikingDBConfig
-from configs.middleware.vdb.weaviate_config import WeaviateConfig
+from .cache.redis_config import RedisConfig
+from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
+from .storage.amazon_s3_storage_config import S3StorageConfig
+from .storage.azure_blob_storage_config import AzureBlobStorageConfig
+from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
+from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
+from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
+from .storage.oci_storage_config import OCIStorageConfig
+from .storage.opendal_storage_config import OpenDALStorageConfig
+from .storage.supabase_storage_config import SupabaseStorageConfig
+from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
+from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
+from .vdb.analyticdb_config import AnalyticdbConfig
+from .vdb.baidu_vector_config import BaiduVectorDBConfig
+from .vdb.chroma_config import ChromaConfig
+from .vdb.couchbase_config import CouchbaseConfig
+from .vdb.elasticsearch_config import ElasticsearchConfig
+from .vdb.lindorm_config import LindormConfig
+from .vdb.milvus_config import MilvusConfig
+from .vdb.myscale_config import MyScaleConfig
+from .vdb.oceanbase_config import OceanBaseVectorConfig
+from .vdb.opensearch_config import OpenSearchConfig
+from .vdb.oracle_config import OracleConfig
+from .vdb.pgvector_config import PGVectorConfig
+from .vdb.pgvectors_config import PGVectoRSConfig
+from .vdb.qdrant_config import QdrantConfig
+from .vdb.relyt_config import RelytConfig
+from .vdb.tencent_vector_config import TencentVectorDBConfig
+from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
+from .vdb.tidb_vector_config import TiDBVectorConfig
+from .vdb.upstash_config import UpstashConfig
+from .vdb.vikingdb_config import VikingDBConfig
+from .vdb.weaviate_config import WeaviateConfig
 
 
 class StorageConfig(BaseSettings):
-    STORAGE_TYPE: str = Field(
+    STORAGE_TYPE: Literal[
+        "opendal",
+        "s3",
+        "aliyun-oss",
+        "azure-blob",
+        "baidu-obs",
+        "google-storage",
+        "huawei-obs",
+        "oci-storage",
+        "tencent-cos",
+        "volcengine-tos",
+        "supabase",
+        "local",
+    ] = Field(
         description="Type of storage to use."
-        " Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
-        "'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
-        default="local",
+        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
+        "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
+        default="opendal",
     )
 
     STORAGE_LOCAL_PATH: str = Field(
         description="Path for local storage when STORAGE_TYPE is set to 'local'.",
         default="storage",
+        deprecated=True,
     )
 
 
@@ -235,6 +250,7 @@ class MiddlewareConfig(
     GoogleCloudStorageConfig,
     HuaweiCloudOBSStorageConfig,
     OCIStorageConfig,
+    OpenDALStorageConfig,
     S3StorageConfig,
     SupabaseStorageConfig,
     TencentCloudCOSStorageConfig,

+ 51 - 0
api/configs/middleware/storage/opendal_storage_config.py

@@ -0,0 +1,51 @@
+from enum import StrEnum
+from typing import Literal
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class OpenDALScheme(StrEnum):
+    FS = "fs"
+    S3 = "s3"
+
+
+class OpenDALStorageConfig(BaseSettings):
+    STORAGE_OPENDAL_SCHEME: str = Field(
+        default=OpenDALScheme.FS.value,
+        description="OpenDAL scheme.",
+    )
+    # FS
+    OPENDAL_FS_ROOT: str = Field(
+        default="storage",
+        description="Root path for local storage.",
+    )
+    # S3
+    OPENDAL_S3_ROOT: str = Field(
+        default="/",
+        description="Root path for S3 storage.",
+    )
+    OPENDAL_S3_BUCKET: str = Field(
+        default="",
+        description="S3 bucket name.",
+    )
+    OPENDAL_S3_ENDPOINT: str = Field(
+        default="https://s3.amazonaws.com",
+        description="S3 endpoint URL.",
+    )
+    OPENDAL_S3_ACCESS_KEY_ID: str = Field(
+        default="",
+        description="S3 access key ID.",
+    )
+    OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
+        default="",
+        description="S3 secret access key.",
+    )
+    OPENDAL_S3_REGION: str = Field(
+        default="",
+        description="S3 region.",
+    )
+    OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
+        default="",
+        description="S3 server-side encryption.",
+    )

+ 0 - 1
api/docker/entrypoint.sh

@@ -34,7 +34,6 @@ else
       --workers ${SERVER_WORKER_AMOUNT:-1} \
       --worker-class ${SERVER_WORKER_CLASS:-gevent} \
       --timeout ${GUNICORN_TIMEOUT:-200} \
-      --preload \
       app:app
   fi
 fi

+ 103 - 18
api/extensions/ext_storage.py

@@ -1,31 +1,43 @@
 import logging
-from collections.abc import Generator
+from collections.abc import Callable, Generator, Mapping
 from typing import Union
 
 from flask import Flask
 
 from configs import dify_config
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
 from dify_app import DifyApp
 from extensions.storage.base_storage import BaseStorage
 from extensions.storage.storage_type import StorageType
 
+logger = logging.getLogger(__name__)
 
-class Storage:
-    def __init__(self):
-        self.storage_runner = None
 
+class Storage:
     def init_app(self, app: Flask):
         storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
         with app.app_context():
             self.storage_runner = storage_factory()
 
     @staticmethod
-    def get_storage_factory(storage_type: str) -> type[BaseStorage]:
+    def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
         match storage_type:
             case StorageType.S3:
-                from extensions.storage.aws_s3_storage import AwsS3Storage
+                from extensions.storage.opendal_storage import OpenDALStorage
+
+                kwargs = _load_s3_storage_kwargs()
+                return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
+            case StorageType.OPENDAL:
+                from extensions.storage.opendal_storage import OpenDALStorage
+
+                scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
+                kwargs = _load_opendal_storage_kwargs(scheme)
+                return lambda: OpenDALStorage(scheme=scheme, **kwargs)
+            case StorageType.LOCAL:
+                from extensions.storage.opendal_storage import OpenDALStorage
 
-                return AwsS3Storage
+                kwargs = _load_local_storage_kwargs()
+                return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
             case StorageType.AZURE_BLOB:
                 from extensions.storage.azure_blob_storage import AzureBlobStorage
 
@@ -62,16 +74,14 @@ class Storage:
                 from extensions.storage.supabase_storage import SupabaseStorage
 
                 return SupabaseStorage
-            case StorageType.LOCAL | _:
-                from extensions.storage.local_fs_storage import LocalFsStorage
-
-                return LocalFsStorage
+            case _:
+                raise ValueError(f"Unsupported storage type {storage_type}")
 
     def save(self, filename, data):
         try:
             self.storage_runner.save(filename, data)
         except Exception as e:
-            logging.exception(f"Failed to save file {filename}")
+            logger.exception(f"Failed to save file {filename}")
             raise e
 
     def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
@@ -81,45 +91,120 @@ class Storage:
             else:
                 return self.load_once(filename)
         except Exception as e:
-            logging.exception(f"Failed to load file {filename}")
+            logger.exception(f"Failed to load file {filename}")
             raise e
 
     def load_once(self, filename: str) -> bytes:
         try:
             return self.storage_runner.load_once(filename)
         except Exception as e:
-            logging.exception(f"Failed to load_once file {filename}")
+            logger.exception(f"Failed to load_once file {filename}")
             raise e
 
     def load_stream(self, filename: str) -> Generator:
         try:
             return self.storage_runner.load_stream(filename)
         except Exception as e:
-            logging.exception(f"Failed to load_stream file {filename}")
+            logger.exception(f"Failed to load_stream file {filename}")
             raise e
 
     def download(self, filename, target_filepath):
         try:
             self.storage_runner.download(filename, target_filepath)
         except Exception as e:
-            logging.exception(f"Failed to download file {filename}")
+            logger.exception(f"Failed to download file {filename}")
             raise e
 
     def exists(self, filename):
         try:
             return self.storage_runner.exists(filename)
         except Exception as e:
-            logging.exception(f"Failed to check file exists {filename}")
+            logger.exception(f"Failed to check file exists {filename}")
             raise e
 
     def delete(self, filename):
         try:
             return self.storage_runner.delete(filename)
         except Exception as e:
-            logging.exception(f"Failed to delete file {filename}")
+            logger.exception(f"Failed to delete file {filename}")
             raise e
 
 
+def _load_s3_storage_kwargs() -> Mapping[str, str]:
+    """
+    Load the kwargs for S3 storage based on dify_config.
+    Handles special cases like AWS managed IAM and R2.
+    """
+    kwargs = {
+        "root": "/",
+        "bucket": dify_config.S3_BUCKET_NAME,
+        "endpoint": dify_config.S3_ENDPOINT,
+        "access_key_id": dify_config.S3_ACCESS_KEY,
+        "secret_access_key": dify_config.S3_SECRET_KEY,
+        "region": dify_config.S3_REGION,
+    }
+    kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
+
+    # For AWS managed IAM
+    if dify_config.S3_USE_AWS_MANAGED_IAM:
+        from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
+
+        logger.debug("Using AWS managed IAM role for S3")
+        kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
+
+    # For Cloudflare R2
+    if kwargs.get("endpoint"):
+        from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
+
+        if is_r2_endpoint(kwargs["endpoint"]):
+            logger.debug("Using R2 for OpenDAL S3")
+            kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
+
+    return kwargs
+
+
+def _load_local_storage_kwargs() -> Mapping[str, str]:
+    """
+    Load the kwargs for local storage based on dify_config.
+    """
+    return {
+        "root": dify_config.STORAGE_LOCAL_PATH,
+    }
+
+
+def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
+    """
+    Load the kwargs for OpenDAL storage based on the given scheme.
+    """
+    match scheme:
+        case OpenDALScheme.FS:
+            kwargs = {
+                "root": dify_config.OPENDAL_FS_ROOT,
+            }
+        case OpenDALScheme.S3:
+            # Load OpenDAL S3-related configs
+            kwargs = {
+                "root": dify_config.OPENDAL_S3_ROOT,
+                "bucket": dify_config.OPENDAL_S3_BUCKET,
+                "endpoint": dify_config.OPENDAL_S3_ENDPOINT,
+                "access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
+                "secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
+                "region": dify_config.OPENDAL_S3_REGION,
+            }
+
+            # For Cloudflare R2
+            if kwargs.get("endpoint"):
+                from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
+
+                if is_r2_endpoint(kwargs["endpoint"]):
+                    logger.debug("Using R2 for OpenDAL S3")
+                    kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
+        case _:
+            logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
+            kwargs = {}
+    return kwargs
+
+
 storage = Storage()
 
 

+ 0 - 3
api/extensions/storage/base_storage.py

@@ -7,9 +7,6 @@ from collections.abc import Generator
 class BaseStorage(ABC):
     """Interface for file storage."""
 
-    def __init__(self):  # noqa: B027
-        pass
-
     @abstractmethod
     def save(self, filename, data):
         raise NotImplementedError

+ 0 - 62
api/extensions/storage/local_fs_storage.py

@@ -1,62 +0,0 @@
-import os
-import shutil
-from collections.abc import Generator
-from pathlib import Path
-
-from flask import current_app
-
-from configs import dify_config
-from extensions.storage.base_storage import BaseStorage
-
-
-class LocalFsStorage(BaseStorage):
-    """Implementation for local filesystem storage."""
-
-    def __init__(self):
-        super().__init__()
-        folder = dify_config.STORAGE_LOCAL_PATH
-        if not os.path.isabs(folder):
-            folder = os.path.join(current_app.root_path, folder)
-        self.folder = folder
-
-    def _build_filepath(self, filename: str) -> str:
-        """Build the full file path based on the folder and filename."""
-        if not self.folder or self.folder.endswith("/"):
-            return self.folder + filename
-        else:
-            return self.folder + "/" + filename
-
-    def save(self, filename, data):
-        filepath = self._build_filepath(filename)
-        folder = os.path.dirname(filepath)
-        os.makedirs(folder, exist_ok=True)
-        Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
-
-    def load_once(self, filename: str) -> bytes:
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        return Path(filepath).read_bytes()
-
-    def load_stream(self, filename: str) -> Generator:
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        with open(filepath, "rb") as f:
-            while chunk := f.read(4096):  # Read in chunks of 4KB
-                yield chunk
-
-    def download(self, filename, target_filepath):
-        filepath = self._build_filepath(filename)
-        if not os.path.exists(filepath):
-            raise FileNotFoundError("File not found")
-        shutil.copyfile(filepath, target_filepath)
-
-    def exists(self, filename):
-        filepath = self._build_filepath(filename)
-        return os.path.exists(filepath)
-
-    def delete(self, filename):
-        filepath = self._build_filepath(filename)
-        if os.path.exists(filepath):
-            os.remove(filepath)

+ 66 - 0
api/extensions/storage/opendal_storage.py

@@ -0,0 +1,66 @@
+from collections.abc import Generator
+from pathlib import Path
+from urllib.parse import urlparse
+
+import opendal
+
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
+from extensions.storage.base_storage import BaseStorage
+
+S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
+S3_R2_COMPATIBLE_KWARGS = {
+    "delete_max_size": "700",
+    "disable_stat_with_override": "true",
+    "region": "auto",
+}
+S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
+    "server_side_encryption": "aws:kms",
+}
+
+
+def is_r2_endpoint(endpoint: str) -> bool:
+    if not endpoint:
+        return False
+
+    parsed_url = urlparse(endpoint)
+    return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
+
+
+class OpenDALStorage(BaseStorage):
+    def __init__(self, scheme: OpenDALScheme, **kwargs):
+        if scheme == OpenDALScheme.FS:
+            Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
+
+        self.op = opendal.Operator(scheme=scheme, **kwargs)
+
+    def save(self, filename: str, data: bytes) -> None:
+        self.op.write(path=filename, bs=data)
+
+    def load_once(self, filename: str) -> bytes:
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        return self.op.read(path=filename)
+
+    def load_stream(self, filename: str) -> Generator:
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        batch_size = 4096
+        file = self.op.open(path=filename, mode="rb")
+        while chunk := file.read(batch_size):
+            yield chunk
+
+    def download(self, filename: str, target_filepath: str):
+        if not self.exists(filename):
+            raise FileNotFoundError("File not found")
+
+        with Path(target_filepath).open("wb") as f:
+            f.write(self.op.read(path=filename))
+
+    def exists(self, filename: str):
+        return self.op.stat(path=filename).mode.is_file()
+
+    def delete(self, filename: str):
+        if self.exists(filename):
+            self.op.delete(path=filename)

+ 1 - 0
api/extensions/storage/storage_type.py

@@ -9,6 +9,7 @@ class StorageType(StrEnum):
     HUAWEI_OBS = "huawei-obs"
     LOCAL = "local"
     OCI_STORAGE = "oci-storage"
+    OPENDAL = "opendal"
     S3 = "s3"
     TENCENT_COS = "tencent-cos"
     VOLCENGINE_TOS = "volcengine-tos"

File diff suppressed because it is too large
+ 398 - 402
api/poetry.lock


+ 1 - 0
api/pyproject.toml

@@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23"
 cos-python-sdk-v5 = "1.9.30"
 esdk-obs-python = "3.24.6.1"
 google-cloud-storage = "2.16.0"
+opendal = "~0.45.12"
 oss2 = "2.18.5"
 supabase = "~2.8.1"
 tos = "~2.7.1"

+ 20 - 0
api/tests/unit_tests/configs/test_opendal_config_parse.py

@@ -0,0 +1,20 @@
+import pytest
+
+from extensions.storage.opendal_storage import is_r2_endpoint
+
+
+@pytest.mark.parametrize(
+    ("endpoint", "expected"),
+    [
+        ("https://bucket.r2.cloudflarestorage.com", True),
+        ("https://custom-domain.r2.cloudflarestorage.com/", True),
+        ("https://bucket.r2.cloudflarestorage.com/path", True),
+        ("https://s3.amazonaws.com", False),
+        ("https://storage.googleapis.com", False),
+        ("http://localhost:9000", False),
+        ("invalid-url", False),
+        ("", False),
+    ],
+)
+def test_is_r2_endpoint(endpoint: str, expected: bool):
+    assert is_r2_endpoint(endpoint) == expected

+ 4 - 4
api/tests/unit_tests/oss/__mock/base.py

@@ -6,7 +6,7 @@ from extensions.storage.base_storage import BaseStorage
 
 
 def get_example_folder() -> str:
-    return "/dify"
+    return "~/dify"
 
 
 def get_example_bucket() -> str:
@@ -22,14 +22,14 @@ def get_example_data() -> bytes:
 
 
 def get_example_filepath() -> str:
-    return "/test"
+    return "~/test"
 
 
 class BaseStorageTest:
     @pytest.fixture(autouse=True)
-    def setup_method(self):
+    def setup_method(self, *args, **kwargs):
         """Should be implemented in child classes to setup specific storage."""
-        self.storage = BaseStorage()
+        self.storage: BaseStorage
 
     def test_save(self):
         """Test saving data."""

+ 0 - 18
api/tests/unit_tests/oss/local/test_local_fs.py

@@ -1,18 +0,0 @@
-from collections.abc import Generator
-
-import pytest
-
-from extensions.storage.local_fs_storage import LocalFsStorage
-from tests.unit_tests.oss.__mock.base import (
-    BaseStorageTest,
-    get_example_folder,
-)
-from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
-
-
-class TestLocalFS(BaseStorageTest):
-    @pytest.fixture(autouse=True)
-    def setup_method(self, setup_local_fs_mock):
-        """Executed before each test method."""
-        self.storage = LocalFsStorage()
-        self.storage.folder = get_example_folder()

+ 0 - 0
api/tests/unit_tests/oss/local/__init__.py → api/tests/unit_tests/oss/opendal/__init__.py


+ 19 - 0
api/tests/unit_tests/oss/opendal/test_opendal.py

@@ -0,0 +1,19 @@
+import pytest
+
+from configs.middleware.storage.opendal_storage_config import OpenDALScheme
+from extensions.storage.opendal_storage import OpenDALStorage
+from tests.unit_tests.oss.__mock.base import (
+    BaseStorageTest,
+    get_example_folder,
+)
+from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
+
+
+class TestOpenDAL(BaseStorageTest):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, *args, **kwargs):
+        """Executed before each test method."""
+        self.storage = OpenDALStorage(
+            scheme=OpenDALScheme.FS,
+            root=get_example_folder(),
+        )

+ 17 - 4
docker/.env.example

@@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
 # ------------------------------
 
 # The type of storage to use for storing user files.
-# Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
-# Default: `local`
-STORAGE_TYPE=local
-STORAGE_LOCAL_PATH=storage
+# Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
+# Default: `opendal`
+STORAGE_TYPE=opendal
+
+# Apache OpenDAL Configuration, refer to https://github.com/apache/opendal
+# The scheme for the OpenDAL storage.
+STORAGE_OPENDAL_SCHEME=fs
+# OpenDAL FS
+OPENDAL_FS_ROOT=storage
+# OpenDAL S3
+OPENDAL_S3_ROOT=/
+OPENDAL_S3_BUCKET=your-bucket-name
+OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
+OPENDAL_S3_ACCESS_KEY_ID=your-access-key
+OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
+OPENDAL_S3_REGION=your-region
+OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
 
 # S3 Configuration
 # Whether to use AWS managed IAM roles for authenticating with the S3 service.

Some files were not shown because too many files changed in this diff