hit_testing.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import logging
  2. from flask_login import current_user
  3. from core.login.login import login_required
  4. from flask_restful import Resource, reqparse, marshal, fields
  5. from werkzeug.exceptions import InternalServerError, NotFound, Forbidden
  6. import services
  7. from controllers.console import api
  8. from controllers.console.app.error import ProviderNotInitializeError, ProviderQuotaExceededError, \
  9. ProviderModelCurrentlyNotSupportError
  10. from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
  11. from controllers.console.setup import setup_required
  12. from controllers.console.wraps import account_initialization_required
  13. from core.model_providers.error import ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError, \
  14. LLMBadRequestError
  15. from libs.helper import TimestampField
  16. from services.dataset_service import DatasetService
  17. from services.hit_testing_service import HitTestingService
  18. document_fields = {
  19. 'id': fields.String,
  20. 'data_source_type': fields.String,
  21. 'name': fields.String,
  22. 'doc_type': fields.String,
  23. }
  24. segment_fields = {
  25. 'id': fields.String,
  26. 'position': fields.Integer,
  27. 'document_id': fields.String,
  28. 'content': fields.String,
  29. 'answer': fields.String,
  30. 'word_count': fields.Integer,
  31. 'tokens': fields.Integer,
  32. 'keywords': fields.List(fields.String),
  33. 'index_node_id': fields.String,
  34. 'index_node_hash': fields.String,
  35. 'hit_count': fields.Integer,
  36. 'enabled': fields.Boolean,
  37. 'disabled_at': TimestampField,
  38. 'disabled_by': fields.String,
  39. 'status': fields.String,
  40. 'created_by': fields.String,
  41. 'created_at': TimestampField,
  42. 'indexing_at': TimestampField,
  43. 'completed_at': TimestampField,
  44. 'error': fields.String,
  45. 'stopped_at': TimestampField,
  46. 'document': fields.Nested(document_fields),
  47. }
  48. hit_testing_record_fields = {
  49. 'segment': fields.Nested(segment_fields),
  50. 'score': fields.Float,
  51. 'tsne_position': fields.Raw
  52. }
  53. class HitTestingApi(Resource):
  54. @setup_required
  55. @login_required
  56. @account_initialization_required
  57. def post(self, dataset_id):
  58. dataset_id_str = str(dataset_id)
  59. dataset = DatasetService.get_dataset(dataset_id_str)
  60. if dataset is None:
  61. raise NotFound("Dataset not found.")
  62. try:
  63. DatasetService.check_dataset_permission(dataset, current_user)
  64. except services.errors.account.NoPermissionError as e:
  65. raise Forbidden(str(e))
  66. # only high quality dataset can be used for hit testing
  67. if dataset.indexing_technique != 'high_quality':
  68. raise HighQualityDatasetOnlyError()
  69. parser = reqparse.RequestParser()
  70. parser.add_argument('query', type=str, location='json')
  71. args = parser.parse_args()
  72. query = args['query']
  73. if not query or len(query) > 250:
  74. raise ValueError('Query is required and cannot exceed 250 characters')
  75. try:
  76. response = HitTestingService.retrieve(
  77. dataset=dataset,
  78. query=query,
  79. account=current_user,
  80. limit=10,
  81. )
  82. return {"query": response['query'], 'records': marshal(response['records'], hit_testing_record_fields)}
  83. except services.errors.index.IndexNotInitializedError:
  84. raise DatasetNotInitializedError()
  85. except ProviderTokenNotInitError as ex:
  86. raise ProviderNotInitializeError(ex.description)
  87. except QuotaExceededError:
  88. raise ProviderQuotaExceededError()
  89. except ModelCurrentlyNotSupportError:
  90. raise ProviderModelCurrentlyNotSupportError()
  91. except LLMBadRequestError:
  92. raise ProviderNotInitializeError(
  93. f"No Embedding Model available. Please configure a valid provider "
  94. f"in the Settings -> Model Provider.")
  95. except ValueError as e:
  96. raise ValueError(str(e))
  97. except Exception as e:
  98. logging.exception("Hit testing failed.")
  99. raise InternalServerError(str(e))
  100. api.add_resource(HitTestingApi, '/datasets/<uuid:dataset_id>/hit-testing')