소스 검색

feat: add pgvector full_text_search (#7396)

Byeongjin Kang 8 달 전
부모
커밋
0223fc6fd5
2개의 변경된 파일21개의 추가작업 그리고 6개의 파일을 삭제
  1. 21 2
      api/core/rag/datasource/vdb/pgvector/pgvector.py
  2. 0 4
      api/tests/integration_tests/vdb/pgvector/test_pgvector.py

+ 21 - 2
api/core/rag/datasource/vdb/pgvector/pgvector.py

@@ -152,8 +152,27 @@ class PGVector(BaseVector):
         return docs
 
     def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
-        # do not support bm25 search
-        return []
+        top_k = kwargs.get("top_k", 5)
+
+        with self._get_cursor() as cur:
+            cur.execute(
+                f"""SELECT meta, text, ts_rank(to_tsvector(coalesce(text, '')), to_tsquery(%s)) AS score
+                FROM {self.table_name}
+                WHERE to_tsvector(text) @@ plainto_tsquery(%s)
+                ORDER BY score DESC
+                LIMIT {top_k}""",
+                # f"'{query}'" is required in order to account for whitespace in query
+                (f"'{query}'", f"'{query}'"),
+            )
+
+            docs = []
+
+            for record in cur:
+                metadata, text, score = record
+                metadata["score"] = score
+                docs.append(Document(page_content=text, metadata=metadata))
+
+        return docs
 
     def delete(self) -> None:
         with self._get_cursor() as cur:

+ 0 - 4
api/tests/integration_tests/vdb/pgvector/test_pgvector.py

@@ -21,10 +21,6 @@ class PGVectorTest(AbstractVectorTest):
             ),
         )
 
-    def search_by_full_text(self):
-        hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
-        assert len(hits_by_full_text) == 0
-
 
 def test_pgvector(setup_mock_redis):
     PGVectorTest().run_all_tests()