Ver código fonte

fix detached instance error in keyword index create thread and fix question classifier node out of index error (#3219)

Jyong 1 ano atrás
pai
commit
33ea689861

+ 6 - 3
api/core/indexing_runner.py

@@ -661,7 +661,7 @@ class IndexingRunner:
         # create keyword index
         # create keyword index
         create_keyword_thread = threading.Thread(target=self._process_keyword_index,
         create_keyword_thread = threading.Thread(target=self._process_keyword_index,
                                                  args=(current_app._get_current_object(),
                                                  args=(current_app._get_current_object(),
-                                                       dataset, dataset_document, documents))
+                                                       dataset.id, dataset_document.id, documents))
         create_keyword_thread.start()
         create_keyword_thread.start()
         if dataset.indexing_technique == 'high_quality':
         if dataset.indexing_technique == 'high_quality':
             with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
             with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
@@ -690,14 +690,17 @@ class IndexingRunner:
             }
             }
         )
         )
 
 
-    def _process_keyword_index(self, flask_app, dataset, dataset_document, documents):
+    def _process_keyword_index(self, flask_app, dataset_id, document_id, documents):
         with flask_app.app_context():
         with flask_app.app_context():
+            dataset = Dataset.query.filter_by(id=dataset_id).first()
+            if not dataset:
+                raise ValueError("no dataset found")
             keyword = Keyword(dataset)
             keyword = Keyword(dataset)
             keyword.create(documents)
             keyword.create(documents)
             if dataset.indexing_technique != 'high_quality':
             if dataset.indexing_technique != 'high_quality':
                 document_ids = [document.metadata['doc_id'] for document in documents]
                 document_ids = [document.metadata['doc_id'] for document in documents]
                 db.session.query(DocumentSegment).filter(
                 db.session.query(DocumentSegment).filter(
-                    DocumentSegment.document_id == dataset_document.id,
+                    DocumentSegment.document_id == document_id,
                     DocumentSegment.index_node_id.in_(document_ids),
                     DocumentSegment.index_node_id.in_(document_ids),
                     DocumentSegment.status == "indexing"
                     DocumentSegment.status == "indexing"
                 ).update({
                 ).update({

+ 3 - 1
api/core/workflow/nodes/question_classifier/question_classifier_node.py

@@ -65,7 +65,9 @@ class QuestionClassifierNode(LLMNode):
         categories = [_class.name for _class in node_data.classes]
         categories = [_class.name for _class in node_data.classes]
         try:
         try:
             result_text_json = json.loads(result_text.strip('```JSON\n'))
             result_text_json = json.loads(result_text.strip('```JSON\n'))
-            categories = result_text_json.get('categories', [])
+            categories_result = result_text_json.get('categories', [])
+            if categories_result:
+                categories = categories_result
         except Exception:
         except Exception:
             logging.error(f"Failed to parse result text: {result_text}")
             logging.error(f"Failed to parse result text: {result_text}")
         try:
         try: