ソースを参照

fix qa index processor tenant id is None error (#2713)

Co-authored-by: jyong <jyong@dify.ai>
Jyong 1 年間 前
コミット
31070ffbca

+ 7 - 4
api/core/indexing_runner.py

@@ -62,7 +62,8 @@ class IndexingRunner:
                 text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
 
                 # transform
-                documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
+                documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
+                                            processing_rule.to_dict())
                 # save segment
                 self._load_segments(dataset, dataset_document, documents)
 
@@ -120,7 +121,8 @@ class IndexingRunner:
             text_docs = self._extract(index_processor, dataset_document, processing_rule.to_dict())
 
             # transform
-            documents = self._transform(index_processor, dataset, text_docs, processing_rule.to_dict())
+            documents = self._transform(index_processor, dataset, text_docs, dataset_document.doc_language,
+                                        processing_rule.to_dict())
             # save segment
             self._load_segments(dataset, dataset_document, documents)
 
@@ -750,7 +752,7 @@ class IndexingRunner:
         index_processor.load(dataset, documents)
 
     def _transform(self, index_processor: BaseIndexProcessor, dataset: Dataset,
-                   text_docs: list[Document], process_rule: dict) -> list[Document]:
+                   text_docs: list[Document], doc_language: str, process_rule: dict) -> list[Document]:
         # get embedding model instance
         embedding_model_instance = None
         if dataset.indexing_technique == 'high_quality':
@@ -768,7 +770,8 @@ class IndexingRunner:
                 )
 
         documents = index_processor.transform(text_docs, embedding_model_instance=embedding_model_instance,
-                                              process_rule=process_rule)
+                                              process_rule=process_rule, tenant_id=dataset.tenant_id,
+                                              doc_language=doc_language)
 
         return documents
 

+ 3 - 4
api/core/rag/index_processor/processor/qa_index_processor.py

@@ -7,7 +7,6 @@ from typing import Optional
 
 import pandas as pd
 from flask import Flask, current_app
-from flask_login import current_user
 from werkzeug.datastructures import FileStorage
 
 from core.generator.llm_generator import LLMGenerator
@@ -31,7 +30,7 @@ class QAIndexProcessor(BaseIndexProcessor):
 
     def transform(self, documents: list[Document], **kwargs) -> list[Document]:
         splitter = self._get_splitter(processing_rule=kwargs.get('process_rule'),
-                                      embedding_model_instance=None)
+                                      embedding_model_instance=kwargs.get('embedding_model_instance'))
 
         # Split the text documents into nodes.
         all_documents = []
@@ -66,10 +65,10 @@ class QAIndexProcessor(BaseIndexProcessor):
             for doc in sub_documents:
                 document_format_thread = threading.Thread(target=self._format_qa_document, kwargs={
                     'flask_app': current_app._get_current_object(),
-                    'tenant_id': current_user.current_tenant.id,
+                    'tenant_id': kwargs.get('tenant_id'),
                     'document_node': doc,
                     'all_qa_documents': all_qa_documents,
-                    'document_language': kwargs.get('document_language', 'English')})
+                    'document_language': kwargs.get('doc_language', 'English')})
                 threads.append(document_format_thread)
                 document_format_thread.start()
             for thread in threads: