Преглед на файлове

Fix the issue of repeated escaping of quotes in hit test (#13477)

liuzhenghua преди 2 месеца
родител
ревизия
47a64610ca
променени са 2 файла, в които са добавени 4 реда и са изтрити 3 реда
  1. 3 2
      api/core/rag/datasource/retrieval_service.py
  2. 1 1
      api/services/hit_testing_service.py

+ 3 - 2
api/core/rag/datasource/retrieval_service.py

@@ -1,3 +1,4 @@
+import json
 import threading
 from typing import Optional
 
@@ -171,7 +172,7 @@ class RetrievalService:
                 vector = Vector(dataset=dataset)
 
                 documents = vector.search_by_vector(
-                    cls.escape_query_for_search(query),
+                    query,
                     search_type="similarity_score_threshold",
                     top_k=top_k,
                     score_threshold=score_threshold,
@@ -250,7 +251,7 @@ class RetrievalService:
 
     @staticmethod
     def escape_query_for_search(query: str) -> str:
-        return query.replace('"', '\\"')
+        return json.dumps(query).strip('"')
 
     @staticmethod
     def format_retrieval_documents(documents: list[Document]) -> list[RetrievalSegments]:

+ 1 - 1
api/services/hit_testing_service.py

@@ -47,7 +47,7 @@ class HitTestingService:
         all_documents = RetrievalService.retrieve(
             retrieval_method=retrieval_model.get("search_method", "semantic_search"),
             dataset_id=dataset.id,
-            query=cls.escape_query_for_search(query),
+            query=query,
             top_k=retrieval_model.get("top_k", 2),
             score_threshold=retrieval_model.get("score_threshold", 0.0)
             if retrieval_model["score_threshold_enabled"]