Browse Source

fix document could be None (#15818)

Jyong 1 month ago
parent
commit
84a866028a

+ 2 - 0
api/core/rag/datasource/retrieval_service.py

@@ -276,6 +276,8 @@ class RetrievalService:
                     continue
 
                 dataset_document = dataset_documents[document_id]
+                if not dataset_document:
+                    continue
 
                 if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
                     # Handle parent-child documents

+ 23 - 20
api/core/rag/retrieval/dataset_retrieval.py

@@ -433,30 +433,33 @@ class DatasetRetrieval:
                 dataset_document = DatasetDocument.query.filter(
                     DatasetDocument.id == document.metadata["document_id"]
                 ).first()
-                if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
-                    child_chunk = ChildChunk.query.filter(
-                        ChildChunk.index_node_id == document.metadata["doc_id"],
-                        ChildChunk.dataset_id == dataset_document.dataset_id,
-                        ChildChunk.document_id == dataset_document.id,
-                    ).first()
-                    if child_chunk:
-                        segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update(
-                            {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False
+                if dataset_document:
+                    if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
+                        child_chunk = ChildChunk.query.filter(
+                            ChildChunk.index_node_id == document.metadata["doc_id"],
+                            ChildChunk.dataset_id == dataset_document.dataset_id,
+                            ChildChunk.document_id == dataset_document.id,
+                        ).first()
+                        if child_chunk:
+                            segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update(
+                                {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False
+                            )
+                            db.session.commit()
+                    else:
+                        query = db.session.query(DocumentSegment).filter(
+                            DocumentSegment.index_node_id == document.metadata["doc_id"]
                         )
-                        db.session.commit()
-                else:
-                    query = db.session.query(DocumentSegment).filter(
-                        DocumentSegment.index_node_id == document.metadata["doc_id"]
-                    )
 
-                    # if 'dataset_id' in document.metadata:
-                    if "dataset_id" in document.metadata:
-                        query = query.filter(DocumentSegment.dataset_id == document.metadata["dataset_id"])
+                        # if 'dataset_id' in document.metadata:
+                        if "dataset_id" in document.metadata:
+                            query = query.filter(DocumentSegment.dataset_id == document.metadata["dataset_id"])
 
-                    # add hit count to document segment
-                    query.update({DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False)
+                        # add hit count to document segment
+                        query.update(
+                            {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False
+                        )
 
-                db.session.commit()
+                    db.session.commit()
 
         # get tracing instance
         trace_manager: TraceQueueManager | None = (