datasets_document.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. # -*- coding:utf-8 -*-
  2. import random
  3. from datetime import datetime
  4. from flask import request
  5. from flask_login import login_required, current_user
  6. from flask_restful import Resource, fields, marshal, marshal_with, reqparse
  7. from sqlalchemy import desc, asc
  8. from werkzeug.exceptions import NotFound, Forbidden
  9. import services
  10. from controllers.console import api
  11. from controllers.console.app.error import ProviderNotInitializeError
  12. from controllers.console.datasets.error import DocumentAlreadyFinishedError, InvalidActionError, DocumentIndexingError, \
  13. InvalidMetadataError, ArchivedDocumentImmutableError
  14. from controllers.console.setup import setup_required
  15. from controllers.console.wraps import account_initialization_required
  16. from core.indexing_runner import IndexingRunner
  17. from core.llm.error import ProviderTokenNotInitError
  18. from extensions.ext_redis import redis_client
  19. from libs.helper import TimestampField
  20. from extensions.ext_database import db
  21. from models.dataset import DatasetProcessRule, Dataset
  22. from models.dataset import Document, DocumentSegment
  23. from models.model import UploadFile
  24. from services.dataset_service import DocumentService, DatasetService
  25. from tasks.add_document_to_index_task import add_document_to_index_task
  26. from tasks.remove_document_from_index_task import remove_document_from_index_task
  27. dataset_fields = {
  28. 'id': fields.String,
  29. 'name': fields.String,
  30. 'description': fields.String,
  31. 'permission': fields.String,
  32. 'data_source_type': fields.String,
  33. 'indexing_technique': fields.String,
  34. 'created_by': fields.String,
  35. 'created_at': TimestampField,
  36. }
  37. document_fields = {
  38. 'id': fields.String,
  39. 'position': fields.Integer,
  40. 'data_source_type': fields.String,
  41. 'data_source_info': fields.Raw(attribute='data_source_info_dict'),
  42. 'dataset_process_rule_id': fields.String,
  43. 'name': fields.String,
  44. 'created_from': fields.String,
  45. 'created_by': fields.String,
  46. 'created_at': TimestampField,
  47. 'tokens': fields.Integer,
  48. 'indexing_status': fields.String,
  49. 'error': fields.String,
  50. 'enabled': fields.Boolean,
  51. 'disabled_at': TimestampField,
  52. 'disabled_by': fields.String,
  53. 'archived': fields.Boolean,
  54. 'display_status': fields.String,
  55. 'word_count': fields.Integer,
  56. 'hit_count': fields.Integer,
  57. }
  58. class DocumentResource(Resource):
  59. def get_document(self, dataset_id: str, document_id: str) -> Document:
  60. dataset = DatasetService.get_dataset(dataset_id)
  61. if not dataset:
  62. raise NotFound('Dataset not found.')
  63. try:
  64. DatasetService.check_dataset_permission(dataset, current_user)
  65. except services.errors.account.NoPermissionError as e:
  66. raise Forbidden(str(e))
  67. document = DocumentService.get_document(dataset_id, document_id)
  68. if not document:
  69. raise NotFound('Document not found.')
  70. if document.tenant_id != current_user.current_tenant_id:
  71. raise Forbidden('No permission.')
  72. return document
  73. class GetProcessRuleApi(Resource):
  74. @setup_required
  75. @login_required
  76. @account_initialization_required
  77. def get(self):
  78. req_data = request.args
  79. document_id = req_data.get('document_id')
  80. if document_id:
  81. # get the latest process rule
  82. document = Document.query.get_or_404(document_id)
  83. dataset = DatasetService.get_dataset(document.dataset_id)
  84. if not dataset:
  85. raise NotFound('Dataset not found.')
  86. try:
  87. DatasetService.check_dataset_permission(dataset, current_user)
  88. except services.errors.account.NoPermissionError as e:
  89. raise Forbidden(str(e))
  90. # get the latest process rule
  91. dataset_process_rule = db.session.query(DatasetProcessRule). \
  92. filter(DatasetProcessRule.dataset_id == document.dataset_id). \
  93. order_by(DatasetProcessRule.created_at.desc()). \
  94. limit(1). \
  95. one_or_none()
  96. mode = dataset_process_rule.mode
  97. rules = dataset_process_rule.rules_dict
  98. else:
  99. mode = DocumentService.DEFAULT_RULES['mode']
  100. rules = DocumentService.DEFAULT_RULES['rules']
  101. return {
  102. 'mode': mode,
  103. 'rules': rules
  104. }
  105. class DatasetDocumentListApi(Resource):
  106. @setup_required
  107. @login_required
  108. @account_initialization_required
  109. def get(self, dataset_id):
  110. dataset_id = str(dataset_id)
  111. page = request.args.get('page', default=1, type=int)
  112. limit = request.args.get('limit', default=20, type=int)
  113. search = request.args.get('search', default=None, type=str)
  114. sort = request.args.get('sort', default='-created_at', type=str)
  115. dataset = DatasetService.get_dataset(dataset_id)
  116. if not dataset:
  117. raise NotFound('Dataset not found.')
  118. try:
  119. DatasetService.check_dataset_permission(dataset, current_user)
  120. except services.errors.account.NoPermissionError as e:
  121. raise Forbidden(str(e))
  122. query = Document.query.filter_by(
  123. dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id)
  124. if search:
  125. search = f'%{search}%'
  126. query = query.filter(Document.name.like(search))
  127. if sort.startswith('-'):
  128. sort_logic = desc
  129. sort = sort[1:]
  130. else:
  131. sort_logic = asc
  132. if sort == 'hit_count':
  133. sub_query = db.select(DocumentSegment.document_id,
  134. db.func.sum(DocumentSegment.hit_count).label("total_hit_count")) \
  135. .group_by(DocumentSegment.document_id) \
  136. .subquery()
  137. query = query.outerjoin(sub_query, sub_query.c.document_id == Document.id) \
  138. .order_by(sort_logic(db.func.coalesce(sub_query.c.total_hit_count, 0)))
  139. elif sort == 'created_at':
  140. query = query.order_by(sort_logic(Document.created_at))
  141. else:
  142. query = query.order_by(desc(Document.created_at))
  143. paginated_documents = query.paginate(
  144. page=page, per_page=limit, max_per_page=100, error_out=False)
  145. documents = paginated_documents.items
  146. response = {
  147. 'data': marshal(documents, document_fields),
  148. 'has_more': len(documents) == limit,
  149. 'limit': limit,
  150. 'total': paginated_documents.total,
  151. 'page': page
  152. }
  153. return response
  154. @setup_required
  155. @login_required
  156. @account_initialization_required
  157. @marshal_with(document_fields)
  158. def post(self, dataset_id):
  159. dataset_id = str(dataset_id)
  160. dataset = DatasetService.get_dataset(dataset_id)
  161. if not dataset:
  162. raise NotFound('Dataset not found.')
  163. # The role of the current user in the ta table must be admin or owner
  164. if current_user.current_tenant.current_role not in ['admin', 'owner']:
  165. raise Forbidden()
  166. try:
  167. DatasetService.check_dataset_permission(dataset, current_user)
  168. except services.errors.account.NoPermissionError as e:
  169. raise Forbidden(str(e))
  170. parser = reqparse.RequestParser()
  171. parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False,
  172. location='json')
  173. parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
  174. parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
  175. parser.add_argument('duplicate', type=bool, nullable=False, location='json')
  176. args = parser.parse_args()
  177. if not dataset.indexing_technique and not args['indexing_technique']:
  178. raise ValueError('indexing_technique is required.')
  179. # validate args
  180. DocumentService.document_create_args_validate(args)
  181. try:
  182. document = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
  183. except ProviderTokenNotInitError:
  184. raise ProviderNotInitializeError()
  185. return document
  186. class DatasetInitApi(Resource):
  187. dataset_and_document_fields = {
  188. 'dataset': fields.Nested(dataset_fields),
  189. 'document': fields.Nested(document_fields)
  190. }
  191. @setup_required
  192. @login_required
  193. @account_initialization_required
  194. @marshal_with(dataset_and_document_fields)
  195. def post(self):
  196. # The role of the current user in the ta table must be admin or owner
  197. if current_user.current_tenant.current_role not in ['admin', 'owner']:
  198. raise Forbidden()
  199. parser = reqparse.RequestParser()
  200. parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True,
  201. nullable=False, location='json')
  202. parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
  203. parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
  204. args = parser.parse_args()
  205. # validate args
  206. DocumentService.document_create_args_validate(args)
  207. try:
  208. dataset, document = DocumentService.save_document_without_dataset_id(
  209. tenant_id=current_user.current_tenant_id,
  210. document_data=args,
  211. account=current_user
  212. )
  213. except ProviderTokenNotInitError:
  214. raise ProviderNotInitializeError()
  215. response = {
  216. 'dataset': dataset,
  217. 'document': document
  218. }
  219. return response
  220. class DocumentIndexingEstimateApi(DocumentResource):
  221. @setup_required
  222. @login_required
  223. @account_initialization_required
  224. def get(self, dataset_id, document_id):
  225. dataset_id = str(dataset_id)
  226. document_id = str(document_id)
  227. document = self.get_document(dataset_id, document_id)
  228. if document.indexing_status in ['completed', 'error']:
  229. raise DocumentAlreadyFinishedError()
  230. data_process_rule = document.dataset_process_rule
  231. data_process_rule_dict = data_process_rule.to_dict()
  232. response = {
  233. "tokens": 0,
  234. "total_price": 0,
  235. "currency": "USD",
  236. "total_segments": 0,
  237. "preview": []
  238. }
  239. if document.data_source_type == 'upload_file':
  240. data_source_info = document.data_source_info_dict
  241. if data_source_info and 'upload_file_id' in data_source_info:
  242. file_id = data_source_info['upload_file_id']
  243. file = db.session.query(UploadFile).filter(
  244. UploadFile.tenant_id == document.tenant_id,
  245. UploadFile.id == file_id
  246. ).first()
  247. # raise error if file not found
  248. if not file:
  249. raise NotFound('File not found.')
  250. indexing_runner = IndexingRunner()
  251. response = indexing_runner.indexing_estimate(file, data_process_rule_dict)
  252. return response
  253. class DocumentIndexingStatusApi(DocumentResource):
  254. document_status_fields = {
  255. 'id': fields.String,
  256. 'indexing_status': fields.String,
  257. 'processing_started_at': TimestampField,
  258. 'parsing_completed_at': TimestampField,
  259. 'cleaning_completed_at': TimestampField,
  260. 'splitting_completed_at': TimestampField,
  261. 'completed_at': TimestampField,
  262. 'paused_at': TimestampField,
  263. 'error': fields.String,
  264. 'stopped_at': TimestampField,
  265. 'completed_segments': fields.Integer,
  266. 'total_segments': fields.Integer,
  267. }
  268. @setup_required
  269. @login_required
  270. @account_initialization_required
  271. def get(self, dataset_id, document_id):
  272. dataset_id = str(dataset_id)
  273. document_id = str(document_id)
  274. document = self.get_document(dataset_id, document_id)
  275. completed_segments = DocumentSegment.query \
  276. .filter(DocumentSegment.completed_at.isnot(None),
  277. DocumentSegment.document_id == str(document_id)) \
  278. .count()
  279. total_segments = DocumentSegment.query \
  280. .filter_by(document_id=str(document_id)) \
  281. .count()
  282. document.completed_segments = completed_segments
  283. document.total_segments = total_segments
  284. return marshal(document, self.document_status_fields)
  285. class DocumentDetailApi(DocumentResource):
  286. METADATA_CHOICES = {'all', 'only', 'without'}
  287. @setup_required
  288. @login_required
  289. @account_initialization_required
  290. def get(self, dataset_id, document_id):
  291. dataset_id = str(dataset_id)
  292. document_id = str(document_id)
  293. document = self.get_document(dataset_id, document_id)
  294. metadata = request.args.get('metadata', 'all')
  295. if metadata not in self.METADATA_CHOICES:
  296. raise InvalidMetadataError(f'Invalid metadata value: {metadata}')
  297. if metadata == 'only':
  298. response = {
  299. 'id': document.id,
  300. 'doc_type': document.doc_type,
  301. 'doc_metadata': document.doc_metadata
  302. }
  303. elif metadata == 'without':
  304. process_rules = DatasetService.get_process_rules(dataset_id)
  305. data_source_info = document.data_source_detail_dict
  306. response = {
  307. 'id': document.id,
  308. 'position': document.position,
  309. 'data_source_type': document.data_source_type,
  310. 'data_source_info': data_source_info,
  311. 'dataset_process_rule_id': document.dataset_process_rule_id,
  312. 'dataset_process_rule': process_rules,
  313. 'name': document.name,
  314. 'created_from': document.created_from,
  315. 'created_by': document.created_by,
  316. 'created_at': document.created_at.timestamp(),
  317. 'tokens': document.tokens,
  318. 'indexing_status': document.indexing_status,
  319. 'completed_at': int(document.completed_at.timestamp()) if document.completed_at else None,
  320. 'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
  321. 'indexing_latency': document.indexing_latency,
  322. 'error': document.error,
  323. 'enabled': document.enabled,
  324. 'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
  325. 'disabled_by': document.disabled_by,
  326. 'archived': document.archived,
  327. 'segment_count': document.segment_count,
  328. 'average_segment_length': document.average_segment_length,
  329. 'hit_count': document.hit_count,
  330. 'display_status': document.display_status
  331. }
  332. else:
  333. process_rules = DatasetService.get_process_rules(dataset_id)
  334. data_source_info = document.data_source_detail_dict_()
  335. response = {
  336. 'id': document.id,
  337. 'position': document.position,
  338. 'data_source_type': document.data_source_type,
  339. 'data_source_info': data_source_info,
  340. 'dataset_process_rule_id': document.dataset_process_rule_id,
  341. 'dataset_process_rule': process_rules,
  342. 'name': document.name,
  343. 'created_from': document.created_from,
  344. 'created_by': document.created_by,
  345. 'created_at': document.created_at.timestamp(),
  346. 'tokens': document.tokens,
  347. 'indexing_status': document.indexing_status,
  348. 'completed_at': int(document.completed_at.timestamp())if document.completed_at else None,
  349. 'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
  350. 'indexing_latency': document.indexing_latency,
  351. 'error': document.error,
  352. 'enabled': document.enabled,
  353. 'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
  354. 'disabled_by': document.disabled_by,
  355. 'archived': document.archived,
  356. 'doc_type': document.doc_type,
  357. 'doc_metadata': document.doc_metadata,
  358. 'segment_count': document.segment_count,
  359. 'average_segment_length': document.average_segment_length,
  360. 'hit_count': document.hit_count,
  361. 'display_status': document.display_status
  362. }
  363. return response, 200
  364. class DocumentProcessingApi(DocumentResource):
  365. @setup_required
  366. @login_required
  367. @account_initialization_required
  368. def patch(self, dataset_id, document_id, action):
  369. dataset_id = str(dataset_id)
  370. document_id = str(document_id)
  371. document = self.get_document(dataset_id, document_id)
  372. # The role of the current user in the ta table must be admin or owner
  373. if current_user.current_tenant.current_role not in ['admin', 'owner']:
  374. raise Forbidden()
  375. if action == "pause":
  376. if document.indexing_status != "indexing":
  377. raise InvalidActionError('Document not in indexing state.')
  378. document.paused_by = current_user.id
  379. document.paused_at = datetime.utcnow()
  380. document.is_paused = True
  381. db.session.commit()
  382. elif action == "resume":
  383. if document.indexing_status not in ["paused", "error"]:
  384. raise InvalidActionError('Document not in paused or error state.')
  385. document.paused_by = None
  386. document.paused_at = None
  387. document.is_paused = False
  388. db.session.commit()
  389. else:
  390. raise InvalidActionError()
  391. return {'result': 'success'}, 200
  392. class DocumentDeleteApi(DocumentResource):
  393. @setup_required
  394. @login_required
  395. @account_initialization_required
  396. def delete(self, dataset_id, document_id):
  397. dataset_id = str(dataset_id)
  398. document_id = str(document_id)
  399. document = self.get_document(dataset_id, document_id)
  400. try:
  401. DocumentService.delete_document(document)
  402. except services.errors.document.DocumentIndexingError:
  403. raise DocumentIndexingError('Cannot delete document during indexing.')
  404. return {'result': 'success'}, 204
  405. class DocumentMetadataApi(DocumentResource):
  406. @setup_required
  407. @login_required
  408. @account_initialization_required
  409. def put(self, dataset_id, document_id):
  410. dataset_id = str(dataset_id)
  411. document_id = str(document_id)
  412. document = self.get_document(dataset_id, document_id)
  413. req_data = request.get_json()
  414. doc_type = req_data.get('doc_type')
  415. doc_metadata = req_data.get('doc_metadata')
  416. # The role of the current user in the ta table must be admin or owner
  417. if current_user.current_tenant.current_role not in ['admin', 'owner']:
  418. raise Forbidden()
  419. if doc_type is None or doc_metadata is None:
  420. raise ValueError('Both doc_type and doc_metadata must be provided.')
  421. if doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
  422. raise ValueError('Invalid doc_type.')
  423. if not isinstance(doc_metadata, dict):
  424. raise ValueError('doc_metadata must be a dictionary.')
  425. metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
  426. document.doc_metadata = {}
  427. for key, value_type in metadata_schema.items():
  428. value = doc_metadata.get(key)
  429. if value is not None and isinstance(value, value_type):
  430. document.doc_metadata[key] = value
  431. document.doc_type = doc_type
  432. document.updated_at = datetime.utcnow()
  433. db.session.commit()
  434. return {'result': 'success', 'message': 'Document metadata updated.'}, 200
  435. class DocumentStatusApi(DocumentResource):
  436. @setup_required
  437. @login_required
  438. @account_initialization_required
  439. def patch(self, dataset_id, document_id, action):
  440. dataset_id = str(dataset_id)
  441. document_id = str(document_id)
  442. document = self.get_document(dataset_id, document_id)
  443. # The role of the current user in the ta table must be admin or owner
  444. if current_user.current_tenant.current_role not in ['admin', 'owner']:
  445. raise Forbidden()
  446. indexing_cache_key = 'document_{}_indexing'.format(document.id)
  447. cache_result = redis_client.get(indexing_cache_key)
  448. if cache_result is not None:
  449. raise InvalidActionError("Document is being indexed, please try again later")
  450. if action == "enable":
  451. if document.enabled:
  452. raise InvalidActionError('Document already enabled.')
  453. document.enabled = True
  454. document.disabled_at = None
  455. document.disabled_by = None
  456. document.updated_at = datetime.utcnow()
  457. db.session.commit()
  458. # Set cache to prevent indexing the same document multiple times
  459. redis_client.setex(indexing_cache_key, 600, 1)
  460. add_document_to_index_task.delay(document_id)
  461. return {'result': 'success'}, 200
  462. elif action == "disable":
  463. if not document.enabled:
  464. raise InvalidActionError('Document already disabled.')
  465. document.enabled = False
  466. document.disabled_at = datetime.utcnow()
  467. document.disabled_by = current_user.id
  468. document.updated_at = datetime.utcnow()
  469. db.session.commit()
  470. # Set cache to prevent indexing the same document multiple times
  471. redis_client.setex(indexing_cache_key, 600, 1)
  472. remove_document_from_index_task.delay(document_id)
  473. return {'result': 'success'}, 200
  474. elif action == "archive":
  475. if document.archived:
  476. raise InvalidActionError('Document already archived.')
  477. document.archived = True
  478. document.archived_at = datetime.utcnow()
  479. document.archived_by = current_user.id
  480. document.updated_at = datetime.utcnow()
  481. db.session.commit()
  482. if document.enabled:
  483. # Set cache to prevent indexing the same document multiple times
  484. redis_client.setex(indexing_cache_key, 600, 1)
  485. remove_document_from_index_task.delay(document_id)
  486. return {'result': 'success'}, 200
  487. else:
  488. raise InvalidActionError()
  489. class DocumentPauseApi(DocumentResource):
  490. def patch(self, dataset_id, document_id):
  491. """pause document."""
  492. dataset_id = str(dataset_id)
  493. document_id = str(document_id)
  494. dataset = DatasetService.get_dataset(dataset_id)
  495. if not dataset:
  496. raise NotFound('Dataset not found.')
  497. document = DocumentService.get_document(dataset.id, document_id)
  498. # 404 if document not found
  499. if document is None:
  500. raise NotFound("Document Not Exists.")
  501. # 403 if document is archived
  502. if DocumentService.check_archived(document):
  503. raise ArchivedDocumentImmutableError()
  504. try:
  505. # pause document
  506. DocumentService.pause_document(document)
  507. except services.errors.document.DocumentIndexingError:
  508. raise DocumentIndexingError('Cannot pause completed document.')
  509. return {'result': 'success'}, 204
  510. class DocumentRecoverApi(DocumentResource):
  511. def patch(self, dataset_id, document_id):
  512. """recover document."""
  513. dataset_id = str(dataset_id)
  514. document_id = str(document_id)
  515. dataset = DatasetService.get_dataset(dataset_id)
  516. if not dataset:
  517. raise NotFound('Dataset not found.')
  518. document = DocumentService.get_document(dataset.id, document_id)
  519. # 404 if document not found
  520. if document is None:
  521. raise NotFound("Document Not Exists.")
  522. # 403 if document is archived
  523. if DocumentService.check_archived(document):
  524. raise ArchivedDocumentImmutableError()
  525. try:
  526. # pause document
  527. DocumentService.recover_document(document)
  528. except services.errors.document.DocumentIndexingError:
  529. raise DocumentIndexingError('Document is not in paused status.')
  530. return {'result': 'success'}, 204
  531. api.add_resource(GetProcessRuleApi, '/datasets/process-rule')
  532. api.add_resource(DatasetDocumentListApi,
  533. '/datasets/<uuid:dataset_id>/documents')
  534. api.add_resource(DatasetInitApi,
  535. '/datasets/init')
  536. api.add_resource(DocumentIndexingEstimateApi,
  537. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate')
  538. api.add_resource(DocumentIndexingStatusApi,
  539. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status')
  540. api.add_resource(DocumentDetailApi,
  541. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
  542. api.add_resource(DocumentProcessingApi,
  543. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>')
  544. api.add_resource(DocumentDeleteApi,
  545. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
  546. api.add_resource(DocumentMetadataApi,
  547. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata')
  548. api.add_resource(DocumentStatusApi,
  549. '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>')
  550. api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause')
  551. api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume')