Procházet zdrojové kódy

fix weaviate hybrid search issue (#1600)

Co-authored-by: jyong <jyong@dify.ai>
Jyong před 1 rokem
rodič
revize
b930716745

+ 1 - 1
api/core/index/vector_index/weaviate_vector_index.py

@@ -111,7 +111,7 @@ class WeaviateVectorIndex(BaseVectorIndex):
         if self._vector_store:
             return self._vector_store
 
-        attributes = ['doc_id', 'dataset_id', 'document_id']
+        attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash']
         if self._is_origin():
             attributes = ['doc_id']
 

+ 6 - 5
api/core/vector_store/vector/weaviate.py

@@ -60,7 +60,7 @@ def _create_weaviate_client(**kwargs: Any) -> Any:
 
 
 def _default_score_normalizer(val: float) -> float:
-    return 1 - 1 / (1 + np.exp(val))
+    return 1 - val
 
 
 def _json_serializable(value: Any) -> Any:
@@ -243,7 +243,8 @@ class Weaviate(VectorStore):
             query_obj = query_obj.with_where(kwargs.get("where_filter"))
         if kwargs.get("additional"):
             query_obj = query_obj.with_additional(kwargs.get("additional"))
-        result = query_obj.with_bm25(query=content).with_limit(k).do()
+        properties = ['text', 'dataset_id', 'doc_hash', 'doc_id', 'document_id']
+        result = query_obj.with_bm25(query=query, properties=properties).with_limit(k).do()
         if "errors" in result:
             raise ValueError(f"Error during query: {result['errors']}")
         docs = []
@@ -380,14 +381,14 @@ class Weaviate(VectorStore):
             result = (
                 query_obj.with_near_vector(vector)
                 .with_limit(k)
-                .with_additional("vector")
+                .with_additional(["vector", "distance"])
                 .do()
             )
         else:
             result = (
                 query_obj.with_near_text(content)
                 .with_limit(k)
-                .with_additional("vector")
+                .with_additional(["vector", "distance"])
                 .do()
             )
 
@@ -397,7 +398,7 @@ class Weaviate(VectorStore):
         docs_and_scores = []
         for res in result["data"]["Get"][self._index_name]:
             text = res.pop(self._text_key)
-            score = np.dot(res["_additional"]["vector"], embedded_query)
+            score = res["_additional"]["distance"]
             docs_and_scores.append((Document(page_content=text, metadata=res), score))
         return docs_and_scores
 

+ 1 - 1
api/core/vector_store/weaviate_vector_store.py

@@ -1,4 +1,4 @@
-from langchain.vectorstores import Weaviate
+from core.vector_store.vector.weaviate import Weaviate
 
 
 class WeaviateVectorStore(Weaviate):

+ 2 - 2
docker/docker-compose.middleware.yaml

@@ -30,7 +30,7 @@ services:
 
   # The Weaviate vector store.
   weaviate:
-    image: semitechnologies/weaviate:1.18.4
+    image: semitechnologies/weaviate:1.19.0
     restart: always
     volumes:
       # Mount the Weaviate data directory to the container.
@@ -63,4 +63,4 @@ services:
 #    environment:
 #      QDRANT__API_KEY: 'difyai123456'
 #    ports:
-#      - "6333:6333"
+#      - "6333:6333"

+ 1 - 1
docker/docker-compose.yaml

@@ -253,7 +253,7 @@ services:
 
   # The Weaviate vector store.
   weaviate:
-    image: semitechnologies/weaviate:1.18.4
+    image: semitechnologies/weaviate:1.19.0
     restart: always
     volumes:
       # Mount the Weaviate data directory to the container.