5 months ago · c8ef9223e5
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -78,7 +78,7 @@ jobs:
 
				       - name: Run Workflow
			
 
				         run: poetry run -C api bash dev/pytest/pytest_workflow.sh
			
 
				 
			
 
				-      - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch)
			
 
				+      - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
			
 
				         uses: hoverkraft-tech/compose-action@v2.0.0
			
 
				         with:
			
 
				           compose-file: |
			
@@ -86,6 +86,7 @@ jobs:
 
				           services: |
			
 
				             weaviate
			
 
				             qdrant
			
 
				+            couchbase-server
			
 
				             etcd
			
 
				             minio
			
 
				             milvus-standalone
			
--- a/.github/workflows/expose_service_ports.sh
+++ b/.github/workflows/expose_service_ports.sh
@@ -7,5 +7,7 @@ yq eval '.services["milvus-standalone"].ports += ["19530:19530"]' -i docker/dock
 
				 yq eval '.services.pgvector.ports += ["5433:5432"]' -i docker/docker-compose.yaml
			
 
				 yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compose.yaml
			
 
				 yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-compose.yaml
			
 
				+yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml
			
 
				+yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml
			
 
				 
			
 
				-echo "Ports exposed for sandbox, weaviate, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch"
			
 
				+echo "Ports exposed for sandbox, weaviate, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase"
			
--- a/.gitignore
+++ b/.gitignore
@@ -173,6 +173,7 @@ docker/volumes/myscale/log/*
 
				 docker/volumes/unstructured/*
			
 
				 docker/volumes/pgvector/data/*
			
 
				 docker/volumes/pgvecto_rs/data/*
			
 
				+docker/volumes/couchbase/*
			
 
				 
			
 
				 docker/nginx/conf.d/default.conf
			
 
				 docker/nginx/ssl/*
			
@@ -189,4 +190,4 @@ pyrightconfig.json
 
				 api/.vscode
			
 
				 
			
 
				 .idea/
			
 
				-.vscode
			
 
				+.vscode
			
--- a/api/.env.example
+++ b/api/.env.example
@@ -120,7 +120,7 @@ SUPABASE_URL=your-server-url
 
				 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
			
 
				 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
			
 
				 
			
 
				-# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb, upstash
			
 
				+# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash
			
 
				 VECTOR_STORE=weaviate
			
 
				 
			
 
				 # Weaviate configuration
			
@@ -136,6 +136,13 @@ QDRANT_CLIENT_TIMEOUT=20
 
				 QDRANT_GRPC_ENABLED=false
			
 
				 QDRANT_GRPC_PORT=6334
			
 
				 
			
 
				+#Couchbase configuration
			
 
				+COUCHBASE_CONNECTION_STRING=127.0.0.1
			
 
				+COUCHBASE_USER=Administrator
			
 
				+COUCHBASE_PASSWORD=password
			
 
				+COUCHBASE_BUCKET_NAME=Embeddings
			
 
				+COUCHBASE_SCOPE_NAME=_default
			
 
				+
			
 
				 # Milvus configuration
			
 
				 MILVUS_URI=http://127.0.0.1:19530
			
 
				 MILVUS_TOKEN=
			
--- a/api/commands.py
+++ b/api/commands.py
@@ -278,6 +278,7 @@ def migrate_knowledge_vector_database():
 
				         VectorType.BAIDU,
			
 
				         VectorType.VIKINGDB,
			
 
				         VectorType.UPSTASH,
			
 
				+        VectorType.COUCHBASE,
			
 
				     }
			
 
				     page = 1
			
 
				     while True:
			
--- a/api/configs/middleware/__init__.py
+++ b/api/configs/middleware/__init__.py
@@ -17,6 +17,7 @@ from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCO
 
				 from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
			
 
				 from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
			
 
				 from configs.middleware.vdb.chroma_config import ChromaConfig
			
 
				+from configs.middleware.vdb.couchbase_config import CouchbaseConfig
			
 
				 from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
			
 
				 from configs.middleware.vdb.milvus_config import MilvusConfig
			
 
				 from configs.middleware.vdb.myscale_config import MyScaleConfig
			
@@ -251,6 +252,7 @@ class MiddlewareConfig(
 
				     TiDBVectorConfig,
			
 
				     WeaviateConfig,
			
 
				     ElasticsearchConfig,
			
 
				+    CouchbaseConfig,
			
 
				     InternalTestConfig,
			
 
				     VikingDBConfig,
			
 
				     UpstashConfig,
			
--- a/api/configs/middleware/vdb/couchbase_config.py
+++ b/api/configs/middleware/vdb/couchbase_config.py
@@ -0,0 +1,34 @@
 
				+from typing import Optional
			
 
				+
			
 
				+from pydantic import BaseModel, Field
			
 
				+
			
 
				+
			
 
				+class CouchbaseConfig(BaseModel):
			
 
				+    """
			
 
				+    Couchbase configs
			
 
				+    """
			
 
				+
			
 
				+    COUCHBASE_CONNECTION_STRING: Optional[str] = Field(
			
 
				+        description="COUCHBASE connection string",
			
 
				+        default=None,
			
 
				+    )
			
 
				+
			
 
				+    COUCHBASE_USER: Optional[str] = Field(
			
 
				+        description="COUCHBASE user",
			
 
				+        default=None,
			
 
				+    )
			
 
				+
			
 
				+    COUCHBASE_PASSWORD: Optional[str] = Field(
			
 
				+        description="COUCHBASE password",
			
 
				+        default=None,
			
 
				+    )
			
 
				+
			
 
				+    COUCHBASE_BUCKET_NAME: Optional[str] = Field(
			
 
				+        description="COUCHBASE bucket name",
			
 
				+        default=None,
			
 
				+    )
			
 
				+
			
 
				+    COUCHBASE_SCOPE_NAME: Optional[str] = Field(
			
 
				+        description="COUCHBASE scope name",
			
 
				+        default=None,
			
 
				+    )
			
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -640,6 +640,7 @@ class DatasetRetrievalSettingApi(Resource):
 
				                 | VectorType.ELASTICSEARCH
			
 
				                 | VectorType.PGVECTOR
			
 
				                 | VectorType.TIDB_ON_QDRANT
			
 
				+                | VectorType.COUCHBASE
			
 
				             ):
			
 
				                 return {
			
 
				                     "retrieval_method": [
			
@@ -678,6 +679,7 @@ class DatasetRetrievalSettingMockApi(Resource):
 
				                 | VectorType.MYSCALE
			
 
				                 | VectorType.ORACLE
			
 
				                 | VectorType.ELASTICSEARCH
			
 
				+                | VectorType.COUCHBASE
			
 
				                 | VectorType.PGVECTOR
			
 
				             ):
			
 
				                 return {
			
--- a/api/core/rag/datasource/vdb/couchbase/__init__.py
+++ b/api/core/rag/datasource/vdb/couchbase/__init__.py
--- a/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
+++ b/api/core/rag/datasource/vdb/couchbase/couchbase_vector.py
@@ -0,0 +1,378 @@
 
				+import json
			
 
				+import logging
			
 
				+import time
			
 
				+import uuid
			
 
				+from datetime import timedelta
			
 
				+from typing import Any
			
 
				+
			
 
				+from couchbase import search
			
 
				+from couchbase.auth import PasswordAuthenticator
			
 
				+from couchbase.cluster import Cluster
			
 
				+from couchbase.management.search import SearchIndex
			
 
				+
			
 
				+# needed for options -- cluster, timeout, SQL++ (N1QL) query, etc.
			
 
				+from couchbase.options import ClusterOptions, SearchOptions
			
 
				+from couchbase.vector_search import VectorQuery, VectorSearch
			
 
				+from flask import current_app
			
 
				+from pydantic import BaseModel, model_validator
			
 
				+
			
 
				+from core.rag.datasource.vdb.vector_base import BaseVector
			
 
				+from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
			
 
				+from core.rag.datasource.vdb.vector_type import VectorType
			
 
				+from core.rag.embedding.embedding_base import Embeddings
			
 
				+from core.rag.models.document import Document
			
 
				+from extensions.ext_redis import redis_client
			
 
				+from models.dataset import Dataset
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class CouchbaseConfig(BaseModel):
			
 
				+    connection_string: str
			
 
				+    user: str
			
 
				+    password: str
			
 
				+    bucket_name: str
			
 
				+    scope_name: str
			
 
				+
			
 
				+    @model_validator(mode="before")
			
 
				+    @classmethod
			
 
				+    def validate_config(cls, values: dict) -> dict:
			
 
				+        if not values.get("connection_string"):
			
 
				+            raise ValueError("config COUCHBASE_CONNECTION_STRING is required")
			
 
				+        if not values.get("user"):
			
 
				+            raise ValueError("config COUCHBASE_USER is required")
			
 
				+        if not values.get("password"):
			
 
				+            raise ValueError("config COUCHBASE_PASSWORD is required")
			
 
				+        if not values.get("bucket_name"):
			
 
				+            raise ValueError("config COUCHBASE_PASSWORD is required")
			
 
				+        if not values.get("scope_name"):
			
 
				+            raise ValueError("config COUCHBASE_SCOPE_NAME is required")
			
 
				+        return values
			
 
				+
			
 
				+
			
 
				+class CouchbaseVector(BaseVector):
			
 
				+    def __init__(self, collection_name: str, config: CouchbaseConfig):
			
 
				+        super().__init__(collection_name)
			
 
				+        self._client_config = config
			
 
				+
			
 
				+        """Connect to couchbase"""
			
 
				+
			
 
				+        auth = PasswordAuthenticator(config.user, config.password)
			
 
				+        options = ClusterOptions(auth)
			
 
				+        self._cluster = Cluster(config.connection_string, options)
			
 
				+        self._bucket = self._cluster.bucket(config.bucket_name)
			
 
				+        self._scope = self._bucket.scope(config.scope_name)
			
 
				+        self._bucket_name = config.bucket_name
			
 
				+        self._scope_name = config.scope_name
			
 
				+
			
 
				+        # Wait until the cluster is ready for use.
			
 
				+        self._cluster.wait_until_ready(timedelta(seconds=5))
			
 
				+
			
 
				+    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
			
 
				+        index_id = str(uuid.uuid4()).replace("-", "")
			
 
				+        self._create_collection(uuid=index_id, vector_length=len(embeddings[0]))
			
 
				+        self.add_texts(texts, embeddings)
			
 
				+
			
 
				+    def _create_collection(self, vector_length: int, uuid: str):
			
 
				+        lock_name = "vector_indexing_lock_{}".format(self._collection_name)
			
 
				+        with redis_client.lock(lock_name, timeout=20):
			
 
				+            collection_exist_cache_key = "vector_indexing_{}".format(self._collection_name)
			
 
				+            if redis_client.get(collection_exist_cache_key):
			
 
				+                return
			
 
				+            if self._collection_exists(self._collection_name):
			
 
				+                return
			
 
				+            manager = self._bucket.collections()
			
 
				+            manager.create_collection(self._client_config.scope_name, self._collection_name)
			
 
				+
			
 
				+            index_manager = self._scope.search_indexes()
			
 
				+
			
 
				+            index_definition = json.loads("""
			
 
				+{
			
 
				+    "type": "fulltext-index",
			
 
				+    "name": "Embeddings._default.Vector_Search",
			
 
				+    "uuid": "26d4db528e78b716",
			
 
				+    "sourceType": "gocbcore",
			
 
				+    "sourceName": "Embeddings",
			
 
				+    "sourceUUID": "2242e4a25b4decd6650c9c7b3afa1dbf",
			
 
				+    "planParams": {
			
 
				+      "maxPartitionsPerPIndex": 1024,
			
 
				+      "indexPartitions": 1
			
 
				+    },
			
 
				+    "params": {
			
 
				+      "doc_config": {
			
 
				+        "docid_prefix_delim": "",
			
 
				+        "docid_regexp": "",
			
 
				+        "mode": "scope.collection.type_field",
			
 
				+        "type_field": "type"
			
 
				+      },
			
 
				+      "mapping": {
			
 
				+        "analysis": { },
			
 
				+        "default_analyzer": "standard",
			
 
				+        "default_datetime_parser": "dateTimeOptional",
			
 
				+        "default_field": "_all",
			
 
				+        "default_mapping": {
			
 
				+          "dynamic": true,
			
 
				+          "enabled": true
			
 
				+        },
			
 
				+        "default_type": "_default",
			
 
				+        "docvalues_dynamic": false,
			
 
				+        "index_dynamic": true,
			
 
				+        "store_dynamic": true,
			
 
				+        "type_field": "_type",
			
 
				+        "types": {
			
 
				+          "collection_name": {
			
 
				+            "dynamic": true,
			
 
				+            "enabled": true,
			
 
				+            "properties": {
			
 
				+              "embedding": {
			
 
				+                "dynamic": false,
			
 
				+                "enabled": true,
			
 
				+                "fields": [
			
 
				+                  {
			
 
				+                    "dims": 1536,
			
 
				+                    "index": true,
			
 
				+                    "name": "embedding",
			
 
				+                    "similarity": "dot_product",
			
 
				+                    "type": "vector",
			
 
				+                    "vector_index_optimized_for": "recall"
			
 
				+                  }
			
 
				+                ]
			
 
				+              },
			
 
				+              "metadata": {
			
 
				+                "dynamic": true,
			
 
				+                "enabled": true
			
 
				+              },
			
 
				+              "text": {
			
 
				+                "dynamic": false,
			
 
				+                "enabled": true,
			
 
				+                "fields": [
			
 
				+                  {
			
 
				+                    "index": true,
			
 
				+                    "name": "text",
			
 
				+                    "store": true,
			
 
				+                    "type": "text"
			
 
				+                  }
			
 
				+                ]
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        }
			
 
				+      },
			
 
				+      "store": {
			
 
				+        "indexType": "scorch",
			
 
				+        "segmentVersion": 16
			
 
				+      }
			
 
				+    },
			
 
				+    "sourceParams": { }
			
 
				+  }
			
 
				+""")
			
 
				+            index_definition["name"] = self._collection_name + "_search"
			
 
				+            index_definition["uuid"] = uuid
			
 
				+            index_definition["params"]["mapping"]["types"]["collection_name"]["properties"]["embedding"]["fields"][0][
			
 
				+                "dims"
			
 
				+            ] = vector_length
			
 
				+            index_definition["params"]["mapping"]["types"][self._scope_name + "." + self._collection_name] = (
			
 
				+                index_definition["params"]["mapping"]["types"].pop("collection_name")
			
 
				+            )
			
 
				+            time.sleep(2)
			
 
				+            index_manager.upsert_index(
			
 
				+                SearchIndex(
			
 
				+                    index_definition["name"],
			
 
				+                    params=index_definition["params"],
			
 
				+                    source_name=self._bucket_name,
			
 
				+                ),
			
 
				+            )
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+            redis_client.set(collection_exist_cache_key, 1, ex=3600)
			
 
				+
			
 
				+    def _collection_exists(self, name: str):
			
 
				+        scope_collection_map: dict[str, Any] = {}
			
 
				+
			
 
				+        # Get a list of all scopes in the bucket
			
 
				+        for scope in self._bucket.collections().get_all_scopes():
			
 
				+            scope_collection_map[scope.name] = []
			
 
				+
			
 
				+            # Get a list of all the collections in the scope
			
 
				+            for collection in scope.collections:
			
 
				+                scope_collection_map[scope.name].append(collection.name)
			
 
				+
			
 
				+        # Check if the collection exists in the scope
			
 
				+        return self._collection_name in scope_collection_map[self._scope_name]
			
 
				+
			
 
				+    def get_type(self) -> str:
			
 
				+        return VectorType.COUCHBASE
			
 
				+
			
 
				+    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
			
 
				+        uuids = self._get_uuids(documents)
			
 
				+        texts = [d.page_content for d in documents]
			
 
				+        metadatas = [d.metadata for d in documents]
			
 
				+
			
 
				+        doc_ids = []
			
 
				+
			
 
				+        documents_to_insert = [
			
 
				+            {"text": text, "embedding": vector, "metadata": metadata}
			
 
				+            for id, text, vector, metadata in zip(uuids, texts, embeddings, metadatas)
			
 
				+        ]
			
 
				+        for doc, id in zip(documents_to_insert, uuids):
			
 
				+            result = self._scope.collection(self._collection_name).upsert(id, doc)
			
 
				+
			
 
				+        doc_ids.extend(uuids)
			
 
				+
			
 
				+        return doc_ids
			
 
				+
			
 
				+    def text_exists(self, id: str) -> bool:
			
 
				+        # Use a parameterized query for safety and correctness
			
 
				+        query = f"""
			
 
				+                SELECT COUNT(1) AS count FROM
			
 
				+                `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
			
 
				+                WHERE META().id = $doc_id
			
 
				+                """
			
 
				+        # Pass the id as a parameter to the query
			
 
				+        result = self._cluster.query(query, named_parameters={"doc_id": id}).execute()
			
 
				+        for row in result:
			
 
				+            return row["count"] > 0
			
 
				+        return False  # Return False if no rows are returned
			
 
				+
			
 
				+    def delete_by_ids(self, ids: list[str]) -> None:
			
 
				+        query = f"""
			
 
				+            DELETE FROM `{self._bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
			
 
				+            WHERE META().id IN $doc_ids;
			
 
				+            """
			
 
				+        try:
			
 
				+            self._cluster.query(query, named_parameters={"doc_ids": ids}).execute()
			
 
				+        except Exception as e:
			
 
				+            logger.error(e)
			
 
				+
			
 
				+    def delete_by_document_id(self, document_id: str):
			
 
				+        query = f"""
			
 
				+                DELETE FROM
			
 
				+                `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
			
 
				+                WHERE META().id = $doc_id;
			
 
				+                """
			
 
				+        self._cluster.query(query, named_parameters={"doc_id": document_id}).execute()
			
 
				+
			
 
				+    # def get_ids_by_metadata_field(self, key: str, value: str):
			
 
				+    #     query = f"""
			
 
				+    #         SELECT id FROM
			
 
				+    #         `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
			
 
				+    #         WHERE `metadata.{key}` = $value;
			
 
				+    #         """
			
 
				+    #     result = self._cluster.query(query, named_parameters={'value':value})
			
 
				+    #     return [row['id'] for row in result.rows()]
			
 
				+
			
 
				+    def delete_by_metadata_field(self, key: str, value: str) -> None:
			
 
				+        query = f"""
			
 
				+            DELETE FROM `{self._client_config.bucket_name}`.{self._client_config.scope_name}.{self._collection_name}
			
 
				+            WHERE metadata.{key} = $value;
			
 
				+            """
			
 
				+        self._cluster.query(query, named_parameters={"value": value}).execute()
			
 
				+
			
 
				+    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
			
 
				+        top_k = kwargs.get("top_k", 5)
			
 
				+        score_threshold = kwargs.get("score_threshold") or 0.0
			
 
				+
			
 
				+        search_req = search.SearchRequest.create(
			
 
				+            VectorSearch.from_vector_query(
			
 
				+                VectorQuery(
			
 
				+                    "embedding",
			
 
				+                    query_vector,
			
 
				+                    top_k,
			
 
				+                )
			
 
				+            )
			
 
				+        )
			
 
				+        try:
			
 
				+            search_iter = self._scope.search(
			
 
				+                self._collection_name + "_search",
			
 
				+                search_req,
			
 
				+                SearchOptions(limit=top_k, collections=[self._collection_name], fields=["*"]),
			
 
				+            )
			
 
				+
			
 
				+            docs = []
			
 
				+            # Parse the results
			
 
				+            for row in search_iter.rows():
			
 
				+                text = row.fields.pop("text")
			
 
				+                metadata = self._format_metadata(row.fields)
			
 
				+                score = row.score
			
 
				+                metadata["score"] = score
			
 
				+                doc = Document(page_content=text, metadata=metadata)
			
 
				+                if score >= score_threshold:
			
 
				+                    docs.append(doc)
			
 
				+        except Exception as e:
			
 
				+            raise ValueError(f"Search failed with error: {e}")
			
 
				+
			
 
				+        return docs
			
 
				+
			
 
				+    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
			
 
				+        top_k = kwargs.get("top_k", 2)
			
 
				+        try:
			
 
				+            CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
			
 
				+            search_iter = self._scope.search(
			
 
				+                self._collection_name + "_search", CBrequest, SearchOptions(limit=top_k, fields=["*"])
			
 
				+            )
			
 
				+
			
 
				+            docs = []
			
 
				+            for row in search_iter.rows():
			
 
				+                text = row.fields.pop("text")
			
 
				+                metadata = self._format_metadata(row.fields)
			
 
				+                score = row.score
			
 
				+                metadata["score"] = score
			
 
				+                doc = Document(page_content=text, metadata=metadata)
			
 
				+                docs.append(doc)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            raise ValueError(f"Search failed with error: {e}")
			
 
				+
			
 
				+        return docs
			
 
				+
			
 
				+    def delete(self):
			
 
				+        manager = self._bucket.collections()
			
 
				+        scopes = manager.get_all_scopes()
			
 
				+
			
 
				+        for scope in scopes:
			
 
				+            for collection in scope.collections:
			
 
				+                if collection.name == self._collection_name:
			
 
				+                    manager.drop_collection("_default", self._collection_name)
			
 
				+
			
 
				+    def _format_metadata(self, row_fields: dict[str, Any]) -> dict[str, Any]:
			
 
				+        """Helper method to format the metadata from the Couchbase Search API.
			
 
				+        Args:
			
 
				+            row_fields (Dict[str, Any]): The fields to format.
			
 
				+
			
 
				+        Returns:
			
 
				+            Dict[str, Any]: The formatted metadata.
			
 
				+        """
			
 
				+        metadata = {}
			
 
				+        for key, value in row_fields.items():
			
 
				+            # Couchbase Search returns the metadata key with a prefix
			
 
				+            # `metadata.` We remove it to get the original metadata key
			
 
				+            if key.startswith("metadata"):
			
 
				+                new_key = key.split("metadata" + ".")[-1]
			
 
				+                metadata[new_key] = value
			
 
				+            else:
			
 
				+                metadata[key] = value
			
 
				+
			
 
				+        return metadata
			
 
				+
			
 
				+
			
 
				+class CouchbaseVectorFactory(AbstractVectorFactory):
			
 
				+    def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> CouchbaseVector:
			
 
				+        if dataset.index_struct_dict:
			
 
				+            class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
			
 
				+            collection_name = class_prefix
			
 
				+        else:
			
 
				+            dataset_id = dataset.id
			
 
				+            collection_name = Dataset.gen_collection_name_by_id(dataset_id)
			
 
				+            dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.COUCHBASE, collection_name))
			
 
				+
			
 
				+        config = current_app.config
			
 
				+        return CouchbaseVector(
			
 
				+            collection_name=collection_name,
			
 
				+            config=CouchbaseConfig(
			
 
				+                connection_string=config.get("COUCHBASE_CONNECTION_STRING"),
			
 
				+                user=config.get("COUCHBASE_USER"),
			
 
				+                password=config.get("COUCHBASE_PASSWORD"),
			
 
				+                bucket_name=config.get("COUCHBASE_BUCKET_NAME"),
			
 
				+                scope_name=config.get("COUCHBASE_SCOPE_NAME"),
			
 
				+            ),
			
 
				+        )
			
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@@ -114,6 +114,10 @@ class Vector:
 
				                 from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory
			
 
				 
			
 
				                 return AnalyticdbVectorFactory
			
 
				+            case VectorType.COUCHBASE:
			
 
				+                from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseVectorFactory
			
 
				+
			
 
				+                return CouchbaseVectorFactory
			
 
				             case VectorType.BAIDU:
			
 
				                 from core.rag.datasource.vdb.baidu.baidu_vector import BaiduVectorFactory
			
 
				 
			
--- a/api/core/rag/datasource/vdb/vector_type.py
+++ b/api/core/rag/datasource/vdb/vector_type.py
@@ -16,6 +16,7 @@ class VectorType(str, Enum):
 
				     TENCENT = "tencent"
			
 
				     ORACLE = "oracle"
			
 
				     ELASTICSEARCH = "elasticsearch"
			
 
				+    COUCHBASE = "couchbase"
			
 
				     BAIDU = "baidu"
			
 
				     VIKINGDB = "vikingdb"
			
 
				     UPSTASH = "upstash"
			
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -1801,6 +1801,46 @@ requests = ">=2.8"
 
				 six = "*"
			
 
				 xmltodict = "*"
			
 
				 
			
 
				+[[package]]
			
 
				+name = "couchbase"
			
 
				+version = "4.3.3"
			
 
				+description = "Python Client for Couchbase"
			
 
				+optional = false
			
 
				+python-versions = ">=3.7"
			
 
				+files = [
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:d8069e4f01332859d56cca597874645c914699162b3979d1b432f0dfc186b124"},
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1caa6cfef49c785b35b1702102f718227f351df87bba2694b9334520c41e9eb5"},
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f4a9a65c44935249fa078fb90a3c28ea71da9d2d5889fcd514b12d0538010ae0"},
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4f144b8c482c18283d8e419b844630d41f3249b07d43d40b5e3535444e57d0fb"},
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c534fba6fdc7cf47eed9dee8a57d1e9eb867bf008574e321fa380a77cebf32f"},
			
 
				+    {file = "couchbase-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b841be06e0e4370b69ebef6bca3409c378186f7d6e964cd645ba18e97216c022"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:eee7a73b3acbdc78ae314fddf7f975b3c9e05df07df255f4dcc878939a2abae0"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:53417cafcf90ff4e2fd81ebba2a08b7ad56f17160d1c5019ad3b09c758aeb363"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0cefd13bea8b0f150f1b9d27fd7614f971f77419b31817781d26ba315ed658bb"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:78fa1054d7740e2fe38fce0a2aab4e9a2d30263d894e0615ee5df297f02f59a3"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb093899cfad5a7472258a9b6a57775dbf23a6e0180241507ba89ce3ab241e41"},
			
 
				+    {file = "couchbase-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7cfbdc699af5715f49365ffbb05a6a7366a534c0d7161edf270ad3e735a6c5d"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:58352cae9b8affdaa2ac012e0a03c8c2632ee6297a878232888b4e0360d0d5df"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:728e7e3b5e1682706cb9d63993d289226d02a25089527b8ecb4e3889dabc38cf"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:73014bf098cf14187a39cc13453e0d859c1d54568df28f69cc308a9a5f24feb2"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a743375804068ae01b73c916bfca738764c8c12f381bb399ef04e784935856a1"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:394c122cfe02a76a99e7d5178e64129f6da49843225e78d8629abcab556c24af"},
			
 
				+    {file = "couchbase-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:bf85d7a5cda548d9801614651206068b4445fa37972e62b14d7521a958198693"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:92d23c9cedd571631070791f2afee0e3d7d8c9ce1bf2ea6e9a4f2fdbc37a0f1e"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:38c42eb29a73cce2998ae5df45bd61b16dce9765d3bff968ec5cf6a622faa291"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:afed137bf0edc642d7b201b6ab7b1e7117bb4c8eac6b2f253cc6e106f334a2a1"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:954d991377d47883aaf903934c5d0f19577680a2abf80d3ce5bb9b3c80991fc7"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5552b9fa684630698dc98d6f3b1082540634c1b7ad5bf53b843b5da57b0169c"},
			
 
				+    {file = "couchbase-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:f88f2b7e0c894f7237d9f3fb5c46abc44b8151a97b3ca8e75f57d23ebf59f9da"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:769e1e2367ea1d4de181fcd4b4e353e9abef97d15b581a6c5aea49ece3dc7d59"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:47f59a0b35ffce060583fd11f98f049f3b70701cf14aab9ac092594aca486aeb"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:440bb93d611827ba0ea2403c6f204fe931467a6cb5811f0e03bf1779204ef843"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cdb4dde62e1d41c0b8707121ab68fa78b7a1508541bd48fc850be396f91bc8d9"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7f8cf45f317b39cc19db5c67b565662f08d6c90305b3aa14e04bc22707258213"},
			
 
				+    {file = "couchbase-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:c97d48ad486c8f201b4482d5594258f949369cb44792ed148d5159a3d12ae21b"},
			
 
				+    {file = "couchbase-4.3.3.tar.gz", hash = "sha256:27808500551564b39b46943cf3daab572694889c1eb638425d363edb48b20da7"},
			
 
				+]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "coverage"
			
 
				 version = "7.2.7"
			
@@ -6850,6 +6890,19 @@ files = [
 
				     {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
			
 
				     {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
			
 
				     {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
			
 
				+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
			
 
				+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
			
 
				+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
			
 
				+    {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
			
 
				+    {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
			
 
				+    {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
			
 
				+    {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
			
 
				 ]
			
 
				 
			
 
				 [package.dependencies]
			
@@ -10866,4 +10919,4 @@ cffi = ["cffi (>=1.11)"]
 
				 [metadata]
			
 
				 lock-version = "2.0"
			
 
				 python-versions = ">=3.10,<3.13"
			
 
				-content-hash = "1b268122d3d4771ba219f0e983322e0454b7b8644dba35da38d7d950d489e1ba"
			
 
				+content-hash = "52552faf5f4823056eb48afe05349ab2f0e9a5bc42105211ccbbb54b59e27b59"
			
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -239,6 +239,7 @@ alibabacloud_gpdb20160503 = "~3.8.0"
 
				 alibabacloud_tea_openapi = "~0.3.9"
			
 
				 chromadb = "0.5.1"
			
 
				 clickhouse-connect = "~0.7.16"
			
 
				+couchbase = "~4.3.0"
			
 
				 elasticsearch = "8.14.0"
			
 
				 opensearch-py = "2.4.0"
			
 
				 oracledb = "~2.2.1"
			
--- a/api/tests/integration_tests/vdb/couchbase/__init__.py
+++ b/api/tests/integration_tests/vdb/couchbase/__init__.py
--- a/api/tests/integration_tests/vdb/couchbase/test_couchbase.py
+++ b/api/tests/integration_tests/vdb/couchbase/test_couchbase.py
@@ -0,0 +1,50 @@
 
				+import subprocess
			
 
				+import time
			
 
				+
			
 
				+from core.rag.datasource.vdb.couchbase.couchbase_vector import CouchbaseConfig, CouchbaseVector
			
 
				+from tests.integration_tests.vdb.test_vector_store import (
			
 
				+    AbstractVectorTest,
			
 
				+    get_example_text,
			
 
				+    setup_mock_redis,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+def wait_for_healthy_container(service_name="couchbase-server", timeout=300):
			
 
				+    start_time = time.time()
			
 
				+    while time.time() - start_time < timeout:
			
 
				+        result = subprocess.run(
			
 
				+            ["docker", "inspect", "--format", "{{.State.Health.Status}}", service_name], capture_output=True, text=True
			
 
				+        )
			
 
				+        if result.stdout.strip() == "healthy":
			
 
				+            print(f"{service_name} is healthy!")
			
 
				+            return True
			
 
				+        else:
			
 
				+            print(f"Waiting for {service_name} to be healthy...")
			
 
				+        time.sleep(10)
			
 
				+    raise TimeoutError(f"{service_name} did not become healthy in time")
			
 
				+
			
 
				+
			
 
				+class CouchbaseTest(AbstractVectorTest):
			
 
				+    def __init__(self):
			
 
				+        super().__init__()
			
 
				+        self.vector = CouchbaseVector(
			
 
				+            collection_name=self.collection_name,
			
 
				+            config=CouchbaseConfig(
			
 
				+                connection_string="couchbase://127.0.0.1",
			
 
				+                user="Administrator",
			
 
				+                password="password",
			
 
				+                bucket_name="Embeddings",
			
 
				+                scope_name="_default",
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def search_by_vector(self):
			
 
				+        # brief sleep to ensure document is indexed
			
 
				+        time.sleep(5)
			
 
				+        hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding)
			
 
				+        assert len(hits_by_vector) == 1
			
 
				+
			
 
				+
			
 
				+def test_couchbase(setup_mock_redis):
			
 
				+    wait_for_healthy_container("couchbase-server", timeout=60)
			
 
				+    CouchbaseTest().run_all_tests()
			
--- a/dev/pytest/pytest_vdb.sh
+++ b/dev/pytest/pytest_vdb.sh
@@ -11,4 +11,5 @@ pytest api/tests/integration_tests/vdb/chroma \
 
				   api/tests/integration_tests/vdb/vikingdb \
			
 
				   api/tests/integration_tests/vdb/baidu \
			
 
				   api/tests/integration_tests/vdb/tcvectordb \
			
 
				-  api/tests/integration_tests/vdb/upstash
			
 
				+  api/tests/integration_tests/vdb/upstash \
			
 
				+  api/tests/integration_tests/vdb/couchbase \
			
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -375,7 +375,7 @@ SUPABASE_URL=your-server-url
 
				 # ------------------------------
			
 
				 
			
 
				 # The type of vector store to use.
			
 
				-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `vikingdb`.
			
 
				+# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `tidb_vector`, `oracle`, `tencent`, `elasticsearch`, `analyticdb`, `couchbase`, `vikingdb`.
			
 
				 VECTOR_STORE=weaviate
			
 
				 
			
 
				 # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
			
@@ -414,6 +414,14 @@ MYSCALE_PASSWORD=
 
				 MYSCALE_DATABASE=dify
			
 
				 MYSCALE_FTS_PARAMS=
			
 
				 
			
 
				+# Couchbase configurations, only available when VECTOR_STORE is `couchbase`
			
 
				+# The connection string must include hostname defined in the docker-compose file (couchbase-server in this case)
			
 
				+COUCHBASE_CONNECTION_STRING=couchbase://couchbase-server
			
 
				+COUCHBASE_USER=Administrator
			
 
				+COUCHBASE_PASSWORD=password
			
 
				+COUCHBASE_BUCKET_NAME=Embeddings
			
 
				+COUCHBASE_SCOPE_NAME=_default
			
 
				+
			
 
				 # pgvector configurations, only available when VECTOR_STORE is `pgvector`
			
 
				 PGVECTOR_HOST=pgvector
			
 
				 PGVECTOR_PORT=5432
			
--- a/docker/couchbase-server/Dockerfile
+++ b/docker/couchbase-server/Dockerfile
@@ -0,0 +1,4 @@
 
				+FROM couchbase/server:latest AS stage_base
			
 
				+# FROM couchbase:latest AS stage_base 
			
 
				+COPY init-cbserver.sh /opt/couchbase/init/
			
 
				+RUN chmod +x /opt/couchbase/init/init-cbserver.sh
			
--- a/docker/couchbase-server/init-cbserver.sh
+++ b/docker/couchbase-server/init-cbserver.sh
@@ -0,0 +1,44 @@
 
				+#!/bin/bash
			
 
				+# used to start couchbase server - can't get around this as docker compose only allows you to start one command - so we have to start couchbase like the standard couchbase Dockerfile would 
			
 
				+# https://github.com/couchbase/docker/blob/master/enterprise/couchbase-server/7.2.0/Dockerfile#L88
			
 
				+
			
 
				+/entrypoint.sh couchbase-server & 
			
 
				+
			
 
				+# track if setup is complete so we don't try to setup again
			
 
				+FILE=/opt/couchbase/init/setupComplete.txt
			
 
				+
			
 
				+if ! [ -f "$FILE" ]; then
			
 
				+  # used to automatically create the cluster based on environment variables
			
 
				+  # https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-cluster-init.html
			
 
				+
			
 
				+  echo $COUCHBASE_ADMINISTRATOR_USERNAME ":"  $COUCHBASE_ADMINISTRATOR_PASSWORD
			
 
				+
			
 
				+  sleep 20s
			
 
				+  /opt/couchbase/bin/couchbase-cli cluster-init -c 127.0.0.1 \
			
 
				+  --cluster-username $COUCHBASE_ADMINISTRATOR_USERNAME \
			
 
				+  --cluster-password $COUCHBASE_ADMINISTRATOR_PASSWORD \
			
 
				+  --services data,index,query,fts \
			
 
				+  --cluster-ramsize $COUCHBASE_RAM_SIZE \
			
 
				+  --cluster-index-ramsize $COUCHBASE_INDEX_RAM_SIZE \
			
 
				+  --cluster-eventing-ramsize $COUCHBASE_EVENTING_RAM_SIZE \
			
 
				+  --cluster-fts-ramsize $COUCHBASE_FTS_RAM_SIZE \
			
 
				+  --index-storage-setting default
			
 
				+
			
 
				+  sleep 2s
			
 
				+
			
 
				+  # used to auto create the bucket based on environment variables
			
 
				+  # https://docs.couchbase.com/server/current/cli/cbcli/couchbase-cli-bucket-create.html
			
 
				+
			
 
				+  /opt/couchbase/bin/couchbase-cli bucket-create -c localhost:8091 \
			
 
				+  --username $COUCHBASE_ADMINISTRATOR_USERNAME \
			
 
				+  --password $COUCHBASE_ADMINISTRATOR_PASSWORD \
			
 
				+  --bucket $COUCHBASE_BUCKET \
			
 
				+  --bucket-ramsize $COUCHBASE_BUCKET_RAMSIZE \
			
 
				+  --bucket-type couchbase
			
 
				+
			
 
				+  # create file so we know that the cluster is setup and don't run the setup again 
			
 
				+  touch $FILE
			
 
				+fi 
			
 
				+  # docker compose will stop the container from running unless we do this
			
 
				+  # known issue and workaround
			
 
				+  tail -f /dev/null
			
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -110,6 +110,11 @@ x-shared-env: &shared-api-worker-env
 
				   QDRANT_CLIENT_TIMEOUT: ${QDRANT_CLIENT_TIMEOUT:-20}
			
 
				   QDRANT_GRPC_ENABLED: ${QDRANT_GRPC_ENABLED:-false}
			
 
				   QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
			
 
				+  COUCHBASE_CONNECTION_STRING: ${COUCHBASE_CONNECTION_STRING:-'couchbase-server'}
			
 
				+  COUCHBASE_USER: ${COUCHBASE_USER:-Administrator}
			
 
				+  COUCHBASE_PASSWORD: ${COUCHBASE_PASSWORD:-password}
			
 
				+  COUCHBASE_BUCKET_NAME: ${COUCHBASE_BUCKET_NAME:-Embeddings}
			
 
				+  COUCHBASE_SCOPE_NAME: ${COUCHBASE_SCOPE_NAME:-_default}
			
 
				   MILVUS_URI: ${MILVUS_URI:-http://127.0.0.1:19530}
			
 
				   MILVUS_TOKEN: ${MILVUS_TOKEN:-}
			
 
				   MILVUS_USER: ${MILVUS_USER:-root}
			
@@ -475,6 +480,39 @@ services:
 
				     environment:
			
 
				       QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456}
			
 
				 
			
 
				+  # The Couchbase vector store.
			
 
				+  couchbase-server:
			
 
				+    build: ./couchbase-server
			
 
				+    profiles:
			
 
				+      - couchbase
			
 
				+    restart: always
			
 
				+    environment:
			
 
				+      - CLUSTER_NAME=dify_search
			
 
				+      - COUCHBASE_ADMINISTRATOR_USERNAME=${COUCHBASE_USER:-Administrator}
			
 
				+      - COUCHBASE_ADMINISTRATOR_PASSWORD=${COUCHBASE_PASSWORD:-password}
			
 
				+      - COUCHBASE_BUCKET=${COUCHBASE_BUCKET_NAME:-Embeddings}
			
 
				+      - COUCHBASE_BUCKET_RAMSIZE=512
			
 
				+      - COUCHBASE_RAM_SIZE=2048
			
 
				+      - COUCHBASE_EVENTING_RAM_SIZE=512
			
 
				+      - COUCHBASE_INDEX_RAM_SIZE=512
			
 
				+      - COUCHBASE_FTS_RAM_SIZE=1024
			
 
				+    hostname: couchbase-server
			
 
				+    container_name: couchbase-server
			
 
				+    working_dir: /opt/couchbase
			
 
				+    stdin_open: true
			
 
				+    tty: true
			
 
				+    entrypoint: [""]
			
 
				+    command: sh -c "/opt/couchbase/init/init-cbserver.sh"
			
 
				+    volumes:
			
 
				+      - ./volumes/couchbase/data:/opt/couchbase/var/lib/couchbase/data
			
 
				+    healthcheck:
			
 
				+      # ensure bucket was created before proceeding
			
 
				+      test: [ "CMD-SHELL", "curl -s -f -u Administrator:password http://localhost:8091/pools/default/buckets | grep -q '\\[{' || exit 1" ]
			
 
				+      interval: 10s
			
 
				+      retries: 10
			
 
				+      start_period: 30s
			
 
				+      timeout: 10s
			
 
				+
			
 
				   # The pgvector vector database.
			
 
				   pgvector:
			
 
				     image: pgvector/pgvector:pg16