Kaynağa Gözat

feat: add upstash as a new vector database provider (#9644)

Zven 6 ay önce
ebeveyn
işleme
8e7a752b2a

+ 5 - 1
api/.env.example

@@ -111,7 +111,7 @@ SUPABASE_URL=your-server-url
 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 
-# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb
+# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, vikingdb, upstash
 VECTOR_STORE=weaviate
 
 # Weaviate configuration
@@ -220,6 +220,10 @@ BAIDU_VECTOR_DB_DATABASE=dify
 BAIDU_VECTOR_DB_SHARD=1
 BAIDU_VECTOR_DB_REPLICAS=3
 
+# Upstash configuration
+UPSTASH_VECTOR_URL=your-server-url
+UPSTASH_VECTOR_TOKEN=your-access-token
+
 # ViKingDB configuration
 VIKINGDB_ACCESS_KEY=your-ak
 VIKINGDB_SECRET_KEY=your-sk

+ 1 - 0
api/commands.py

@@ -277,6 +277,7 @@ def migrate_knowledge_vector_database():
         VectorType.TENCENT,
         VectorType.BAIDU,
         VectorType.VIKINGDB,
+        VectorType.UPSTASH,
     }
     page = 1
     while True:

+ 2 - 0
api/configs/middleware/__init__.py

@@ -28,6 +28,7 @@ from configs.middleware.vdb.qdrant_config import QdrantConfig
 from configs.middleware.vdb.relyt_config import RelytConfig
 from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
 from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
+from configs.middleware.vdb.upstash_config import UpstashConfig
 from configs.middleware.vdb.vikingdb_config import VikingDBConfig
 from configs.middleware.vdb.weaviate_config import WeaviateConfig
 
@@ -246,5 +247,6 @@ class MiddlewareConfig(
     ElasticsearchConfig,
     InternalTestConfig,
     VikingDBConfig,
+    UpstashConfig,
 ):
     pass

+ 20 - 0
api/configs/middleware/vdb/upstash_config.py

@@ -0,0 +1,20 @@
+from typing import Optional
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class UpstashConfig(BaseSettings):
+    """
+    Configuration settings for Upstash vector database
+    """
+
+    UPSTASH_VECTOR_URL: Optional[str] = Field(
+        description="URL of the upstash server (e.g., 'https://vector.upstash.io')",
+        default=None,
+    )
+
+    UPSTASH_VECTOR_TOKEN: Optional[str] = Field(
+        description="Token for authenticating with the upstash server",
+        default=None,
+    )

+ 2 - 0
api/controllers/console/datasets/datasets.py

@@ -630,6 +630,7 @@ class DatasetRetrievalSettingApi(Resource):
                 | VectorType.ORACLE
                 | VectorType.ELASTICSEARCH
                 | VectorType.PGVECTOR
+                | VectorType.UPSTASH
             ):
                 return {
                     "retrieval_method": [
@@ -668,6 +669,7 @@ class DatasetRetrievalSettingMockApi(Resource):
                 | VectorType.ORACLE
                 | VectorType.ELASTICSEARCH
                 | VectorType.PGVECTOR
+                | VectorType.UPSTASH
             ):
                 return {
                     "retrieval_method": [

+ 0 - 0
api/core/rag/datasource/vdb/upstash/__init__.py


+ 129 - 0
api/core/rag/datasource/vdb/upstash/upstash_vector.py

@@ -0,0 +1,129 @@
+import json
+from typing import Any
+from uuid import uuid4
+
+from pydantic import BaseModel, model_validator
+from upstash_vector import Index, Vector
+
+from configs import dify_config
+from core.rag.datasource.vdb.vector_base import BaseVector
+from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
+from core.rag.datasource.vdb.vector_type import VectorType
+from core.rag.embedding.embedding_base import Embeddings
+from core.rag.models.document import Document
+from models.dataset import Dataset
+
+
+class UpstashVectorConfig(BaseModel):
+    url: str
+    token: str
+
+    @model_validator(mode="before")
+    @classmethod
+    def validate_config(cls, values: dict) -> dict:
+        if not values["url"]:
+            raise ValueError("Upstash URL is required")
+        if not values["token"]:
+            raise ValueError("Upstash Token is required")
+        return values
+
+
+class UpstashVector(BaseVector):
+    def __init__(self, collection_name: str, config: UpstashVectorConfig):
+        super().__init__(collection_name)
+        self._table_name = collection_name
+        self.index = Index(url=config.url, token=config.token)
+
+    def _get_index_dimension(self) -> int:
+        index_info = self.index.info()
+        if index_info and index_info.dimension:
+            return index_info.dimension
+        else:
+            return 1536
+
+    def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
+        self.add_texts(texts, embeddings)
+
+    def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
+        vectors = [
+            Vector(
+                id=str(uuid4()),
+                vector=embedding,
+                metadata=doc.metadata,
+                data=doc.page_content,
+            )
+            for doc, embedding in zip(documents, embeddings)
+        ]
+        self.index.upsert(vectors=vectors)
+
+    def text_exists(self, id: str) -> bool:
+        response = self.get_ids_by_metadata_field("doc_id", id)
+        return len(response) > 0
+
+    def delete_by_ids(self, ids: list[str]) -> None:
+        item_ids = []
+        for doc_id in ids:
+            ids = self.get_ids_by_metadata_field("doc_id", doc_id)
+            if id:
+                item_ids += ids
+        self._delete_by_ids(ids=item_ids)
+
+    def _delete_by_ids(self, ids: list[str]) -> None:
+        if ids:
+            self.index.delete(ids=ids)
+
+    def get_ids_by_metadata_field(self, key: str, value: str) -> list[str]:
+        query_result = self.index.query(
+            vector=[1.001 * i for i in range(self._get_index_dimension())],
+            include_metadata=True,
+            top_k=1000,
+            filter=f"{key} = '{value}'",
+        )
+        return [result.id for result in query_result]
+
+    def delete_by_metadata_field(self, key: str, value: str) -> None:
+        ids = self.get_ids_by_metadata_field(key, value)
+        if ids:
+            self._delete_by_ids(ids)
+
+    def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
+        top_k = kwargs.get("top_k", 4)
+        result = self.index.query(vector=query_vector, top_k=top_k, include_metadata=True, include_data=True)
+        docs = []
+        score_threshold = float(kwargs.get("score_threshold") or 0.0)
+        for record in result:
+            metadata = record.metadata
+            text = record.data
+            score = record.score
+            metadata["score"] = score
+            if score > score_threshold:
+                docs.append(Document(page_content=text, metadata=metadata))
+        return docs
+
+    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
+        return []
+
+    def delete(self) -> None:
+        self.index.reset()
+
+    def get_type(self) -> str:
+        return VectorType.UPSTASH
+
+
+class UpstashVectorFactory(AbstractVectorFactory):
+    def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> UpstashVector:
+        if dataset.index_struct_dict:
+            class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
+            collection_name = class_prefix.lower()
+        else:
+            dataset_id = dataset.id
+            collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+            dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.UPSTASH, collection_name))
+
+        return UpstashVector(
+            collection_name=collection_name,
+            config=UpstashVectorConfig(
+                url=dify_config.UPSTASH_VECTOR_URL,
+                token=dify_config.UPSTASH_VECTOR_TOKEN,
+            ),
+        )

+ 4 - 0
api/core/rag/datasource/vdb/vector_factory.py

@@ -111,6 +111,10 @@ class Vector:
                 from core.rag.datasource.vdb.vikingdb.vikingdb_vector import VikingDBVectorFactory
 
                 return VikingDBVectorFactory
+            case VectorType.UPSTASH:
+                from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVectorFactory
+
+                return UpstashVectorFactory
             case _:
                 raise ValueError(f"Vector store {vector_type} is not supported.")
 

+ 1 - 0
api/core/rag/datasource/vdb/vector_type.py

@@ -18,3 +18,4 @@ class VectorType(str, Enum):
     ELASTICSEARCH = "elasticsearch"
     BAIDU = "baidu"
     VIKINGDB = "vikingdb"
+    UPSTASH = "upstash"

+ 58 - 2
api/poetry.lock

@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -929,6 +929,10 @@ files = [
     {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"},
     {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"},
     {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"},
+    {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"},
     {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"},
     {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"},
     {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"},
@@ -941,8 +945,14 @@ files = [
     {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"},
     {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"},
     {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"},
+    {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"},
     {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"},
     {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"},
+    {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"},
     {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"},
     {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"},
     {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"},
@@ -953,8 +963,24 @@ files = [
     {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"},
     {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"},
     {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"},
+    {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"},
     {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"},
     {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"},
+    {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"},
+    {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"},
+    {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"},
+    {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"},
     {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"},
     {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"},
     {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"},
@@ -964,6 +990,10 @@ files = [
     {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"},
     {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"},
     {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"},
+    {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"},
     {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"},
     {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"},
     {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"},
@@ -975,6 +1005,10 @@ files = [
     {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"},
     {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"},
     {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"},
+    {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"},
     {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"},
     {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"},
     {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"},
@@ -987,6 +1021,10 @@ files = [
     {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"},
     {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"},
     {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"},
+    {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"},
     {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"},
     {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"},
     {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"},
@@ -999,6 +1037,10 @@ files = [
     {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"},
     {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"},
     {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"},
+    {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"},
     {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"},
     {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"},
     {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"},
@@ -9784,6 +9826,20 @@ tsv = ["pandas"]
 wikipedia = ["wikipedia"]
 xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
 
+[[package]]
+name = "upstash-vector"
+version = "0.6.0"
+description = "Serverless Vector SDK from Upstash"
+optional = false
+python-versions = "<4.0,>=3.8"
+files = [
+    {file = "upstash_vector-0.6.0-py3-none-any.whl", hash = "sha256:d0bdad7765b8a7f5c205b7a9c81ca4b9a4cee3ee4952afc7d5ea5fb76c3f3c3c"},
+    {file = "upstash_vector-0.6.0.tar.gz", hash = "sha256:a716ed4d0251362208518db8b194158a616d37d1ccbb1155f619df690599e39b"},
+]
+
+[package.dependencies]
+httpx = ">=0.23.0,<1"
+
 [[package]]
 name = "uritemplate"
 version = "4.1.1"
@@ -10796,4 +10852,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "5b102e3bc077ed730e9fb7be9015541111ffe7787888372d50a757aecb1d9eff"
+content-hash = "32fd52006f75e42fbc8f787e559a72f4e033383c73225231e4ecadabfec926f7"

+ 1 - 0
api/pyproject.toml

@@ -248,6 +248,7 @@ pymochow = "1.3.1"
 qdrant-client = "1.7.3"
 tcvectordb = "1.3.2"
 tidb-vector = "0.0.9"
+upstash-vector = "0.6.0"
 volcengine-compat = "~1.0.156"
 weaviate-client = "~3.21.0"
 

+ 75 - 0
api/tests/integration_tests/vdb/__mock/upstashvectordb.py

@@ -0,0 +1,75 @@
+import os
+from typing import Optional
+
+import pytest
+from _pytest.monkeypatch import MonkeyPatch
+from upstash_vector import Index
+
+
+# Mocking the Index class from upstash_vector
+class MockIndex:
+    def __init__(self, url="", token=""):
+        self.url = url
+        self.token = token
+        self.vectors = []
+
+    def upsert(self, vectors):
+        for vector in vectors:
+            vector.score = 0.5
+            self.vectors.append(vector)
+        return {"code": 0, "msg": "operation success", "affectedCount": len(vectors)}
+
+    def fetch(self, ids):
+        return [vector for vector in self.vectors if vector.id in ids]
+
+    def delete(self, ids):
+        self.vectors = [vector for vector in self.vectors if vector.id not in ids]
+        return {"code": 0, "msg": "Success"}
+
+    def query(
+        self,
+        vector: None,
+        top_k: int = 10,
+        include_vectors: bool = False,
+        include_metadata: bool = False,
+        filter: str = "",
+        data: Optional[str] = None,
+        namespace: str = "",
+        include_data: bool = False,
+    ):
+        # Simple mock query, in real scenario you would calculate similarity
+        mock_result = []
+        for vector_data in self.vectors:
+            mock_result.append(vector_data)
+        return mock_result[:top_k]
+
+    def reset(self):
+        self.vectors = []
+
+    def info(self):
+        return AttrDict({"dimension": 1024})
+
+
+class AttrDict(dict):
+    def __getattr__(self, item):
+        return self.get(item)
+
+
+MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
+
+
+@pytest.fixture
+def setup_upstashvector_mock(request, monkeypatch: MonkeyPatch):
+    if MOCK:
+        monkeypatch.setattr(Index, "__init__", MockIndex.__init__)
+        monkeypatch.setattr(Index, "upsert", MockIndex.upsert)
+        monkeypatch.setattr(Index, "fetch", MockIndex.fetch)
+        monkeypatch.setattr(Index, "delete", MockIndex.delete)
+        monkeypatch.setattr(Index, "query", MockIndex.query)
+        monkeypatch.setattr(Index, "reset", MockIndex.reset)
+        monkeypatch.setattr(Index, "info", MockIndex.info)
+
+    yield
+
+    if MOCK:
+        monkeypatch.undo()

+ 0 - 0
api/tests/integration_tests/vdb/upstash/__init__.py


+ 63 - 0
api/tests/integration_tests/vdb/upstash/test_upstash_vector.py

@@ -0,0 +1,63 @@
+import time
+import uuid
+
+from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVector, UpstashVectorConfig
+from core.rag.models.document import Document
+from tests.integration_tests.vdb.__mock.upstashvectordb import setup_upstashvector_mock
+from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest
+
+
+def get_example_text() -> str:
+    return "test_text"
+
+
+def get_example_document(doc_id: str) -> Document:
+    doc = Document(
+        page_content=get_example_text(),
+        metadata={
+            "doc_id": doc_id,
+            "doc_hash": doc_id,
+            "document_id": doc_id,
+            "dataset_id": doc_id,
+        },
+    )
+    return doc
+
+
+class UpstashVectorTest(AbstractVectorTest):
+    def __init__(self):
+        super().__init__()
+        self.vector = UpstashVector(
+            collection_name="test_collection",
+            config=UpstashVectorConfig(
+                url="your-server-url",
+                token="your-access-token",
+            ),
+        )
+        self.example_embedding = [1.001 * i for i in range(self.vector._get_index_dimension())]
+
+    def add_texts(self) -> list[str]:
+        batch_size = 1
+        documents = [get_example_document(doc_id=str(uuid.uuid4())) for _ in range(batch_size)]
+        embeddings = [self.example_embedding] * batch_size
+        self.vector.add_texts(documents=documents, embeddings=embeddings)
+        return [doc.metadata["doc_id"] for doc in documents]
+
+    def get_ids_by_metadata_field(self):
+        print("doc_id", self.example_doc_id)
+        ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
+        assert len(ids) != 0
+
+    def run_all_tests(self):
+        self.create_vector()
+        time.sleep(1)
+        self.search_by_vector()
+        self.text_exists()
+        self.get_ids_by_metadata_field()
+        added_doc_ids = self.add_texts()
+        self.delete_by_ids(added_doc_ids + [self.example_doc_id])
+        self.delete_vector()
+
+
+def test_upstash_vector(setup_upstashvector_mock):
+    UpstashVectorTest().run_all_tests()

+ 2 - 1
dev/pytest/pytest_vdb.sh

@@ -10,4 +10,5 @@ pytest api/tests/integration_tests/vdb/chroma \
   api/tests/integration_tests/vdb/elasticsearch \
   api/tests/integration_tests/vdb/vikingdb \
   api/tests/integration_tests/vdb/baidu \
-  api/tests/integration_tests/vdb/tcvectordb
+  api/tests/integration_tests/vdb/tcvectordb \
+  api/tests/integration_tests/vdb/upstash

+ 2 - 0
docker/docker-compose.yaml

@@ -182,6 +182,8 @@ x-shared-env: &shared-api-worker-env
   VIKINGDB_REGION: ${VIKINGDB_REGION:-cn-shanghai}
   VIKINGDB_HOST: ${VIKINGDB_HOST:-api-vikingdb.xxx.volces.com}
   VIKINGDB_SCHEMA: ${VIKINGDB_SCHEMA:-http}
+  UPSTASH_VECTOR_URL: ${UPSTASH_VECTOR_URL:-https://xxx-vector.upstash.io}
+  UPSTASH_VECTOR_TOKEN: ${UPSTASH_VECTOR_TOKEN:-dify}
   UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15}
   UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5}
   ETL_TYPE: ${ETL_TYPE:-dify}