|
@@ -661,7 +661,7 @@ class IndexingRunner:
|
|
# create keyword index
|
|
# create keyword index
|
|
create_keyword_thread = threading.Thread(target=self._process_keyword_index,
|
|
create_keyword_thread = threading.Thread(target=self._process_keyword_index,
|
|
args=(current_app._get_current_object(),
|
|
args=(current_app._get_current_object(),
|
|
- dataset, dataset_document, documents))
|
|
|
|
|
|
+ dataset.id, dataset_document.id, documents))
|
|
create_keyword_thread.start()
|
|
create_keyword_thread.start()
|
|
if dataset.indexing_technique == 'high_quality':
|
|
if dataset.indexing_technique == 'high_quality':
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
|
|
@@ -690,14 +690,17 @@ class IndexingRunner:
|
|
}
|
|
}
|
|
)
|
|
)
|
|
|
|
|
|
- def _process_keyword_index(self, flask_app, dataset, dataset_document, documents):
|
|
|
|
|
|
+ def _process_keyword_index(self, flask_app, dataset_id, document_id, documents):
|
|
with flask_app.app_context():
|
|
with flask_app.app_context():
|
|
|
|
+ dataset = Dataset.query.filter_by(id=dataset_id).first()
|
|
|
|
+ if not dataset:
|
|
|
|
+ raise ValueError("no dataset found")
|
|
keyword = Keyword(dataset)
|
|
keyword = Keyword(dataset)
|
|
keyword.create(documents)
|
|
keyword.create(documents)
|
|
if dataset.indexing_technique != 'high_quality':
|
|
if dataset.indexing_technique != 'high_quality':
|
|
document_ids = [document.metadata['doc_id'] for document in documents]
|
|
document_ids = [document.metadata['doc_id'] for document in documents]
|
|
db.session.query(DocumentSegment).filter(
|
|
db.session.query(DocumentSegment).filter(
|
|
- DocumentSegment.document_id == dataset_document.id,
|
|
|
|
|
|
+ DocumentSegment.document_id == document_id,
|
|
DocumentSegment.index_node_id.in_(document_ids),
|
|
DocumentSegment.index_node_id.in_(document_ids),
|
|
DocumentSegment.status == "indexing"
|
|
DocumentSegment.status == "indexing"
|
|
).update({
|
|
).update({
|