Przeglądaj źródła

Fix/hit test tsne issue (#2581)

Co-authored-by: jyong <jyong@dify.ai>
Jyong 1 rok temu
rodzic
commit
920b2c2b40

BIN
api/celerybeat-schedule.db


+ 1 - 1
api/core/features/annotation_reply.py

@@ -59,7 +59,7 @@ class AnnotationReplyFeature:
 
             documents = vector.search_by_vector(
                 query=query,
-                k=1,
+                top_k=1,
                 score_threshold=score_threshold,
                 filter={
                     'group_id': [dataset.id]

+ 2 - 2
api/core/rag/datasource/retrieval_service.py

@@ -101,7 +101,7 @@ class RetrievalService:
 
             documents = keyword.search(
                 query,
-                k=top_k
+                top_k=top_k
             )
             all_documents.extend(documents)
 
@@ -121,7 +121,7 @@ class RetrievalService:
             documents = vector.search_by_vector(
                 query,
                 search_type='similarity_score_threshold',
-                k=top_k,
+                top_k=top_k,
                 score_threshold=score_threshold,
                 filter={
                     'group_id': [dataset.id]

+ 3 - 2
api/services/hit_testing_service.py

@@ -133,8 +133,9 @@ class HitTestingService:
         if embedding_length <= 1:
             return [{'x': 0, 'y': 0}]
 
-        concatenate_data = np.array(embeddings).reshape(embedding_length, -1)
-        # concatenate_data = np.concatenate(embeddings)
+        noise = np.random.normal(0, 1e-4, np.array(embeddings).shape)
+        concatenate_data = np.array(embeddings) + noise
+        concatenate_data = concatenate_data.reshape(embedding_length, -1)
 
         perplexity = embedding_length / 2 + 1
         if perplexity >= embedding_length: