app_runner.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. import time
  2. from typing import Generator, List, Optional, Tuple, Union, cast
  3. from core.application_queue_manager import ApplicationQueueManager, PublishFrom
  4. from core.entities.application_entities import AppOrchestrationConfigEntity, ModelConfigEntity, \
  5. PromptTemplateEntity, ExternalDataVariableEntity, ApplicationGenerateEntity, InvokeFrom
  6. from core.file.file_obj import FileObj
  7. from core.memory.token_buffer_memory import TokenBufferMemory
  8. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  9. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  10. from core.model_runtime.entities.model_entities import ModelPropertyKey
  11. from core.model_runtime.errors.invoke import InvokeBadRequestError
  12. from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
  13. from core.features.hosting_moderation import HostingModerationFeature
  14. from core.features.moderation import ModerationFeature
  15. from core.features.external_data_fetch import ExternalDataFetchFeature
  16. from core.features.annotation_reply import AnnotationReplyFeature
  17. from core.prompt.prompt_transform import PromptTransform
  18. from models.model import App, MessageAnnotation, Message
  19. class AppRunner:
  20. def get_pre_calculate_rest_tokens(self, app_record: App,
  21. model_config: ModelConfigEntity,
  22. prompt_template_entity: PromptTemplateEntity,
  23. inputs: dict[str, str],
  24. files: list[FileObj],
  25. query: Optional[str] = None) -> int:
  26. """
  27. Get pre calculate rest tokens
  28. :param app_record: app record
  29. :param model_config: model config entity
  30. :param prompt_template_entity: prompt template entity
  31. :param inputs: inputs
  32. :param files: files
  33. :param query: query
  34. :return:
  35. """
  36. model_type_instance = model_config.provider_model_bundle.model_type_instance
  37. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  38. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  39. max_tokens = 0
  40. for parameter_rule in model_config.model_schema.parameter_rules:
  41. if (parameter_rule.name == 'max_tokens'
  42. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  43. max_tokens = (model_config.parameters.get(parameter_rule.name)
  44. or model_config.parameters.get(parameter_rule.use_template)) or 0
  45. if model_context_tokens is None:
  46. return -1
  47. if max_tokens is None:
  48. max_tokens = 0
  49. # get prompt messages without memory and context
  50. prompt_messages, stop = self.organize_prompt_messages(
  51. app_record=app_record,
  52. model_config=model_config,
  53. prompt_template_entity=prompt_template_entity,
  54. inputs=inputs,
  55. files=files,
  56. query=query
  57. )
  58. prompt_tokens = model_type_instance.get_num_tokens(
  59. model_config.model,
  60. model_config.credentials,
  61. prompt_messages
  62. )
  63. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  64. if rest_tokens < 0:
  65. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  66. "or shrink the max token, or switch to a llm with a larger token limit size.")
  67. return rest_tokens
  68. def recale_llm_max_tokens(self, model_config: ModelConfigEntity,
  69. prompt_messages: List[PromptMessage]):
  70. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  71. model_type_instance = model_config.provider_model_bundle.model_type_instance
  72. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  73. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  74. max_tokens = 0
  75. for parameter_rule in model_config.model_schema.parameter_rules:
  76. if (parameter_rule.name == 'max_tokens'
  77. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  78. max_tokens = (model_config.parameters.get(parameter_rule.name)
  79. or model_config.parameters.get(parameter_rule.use_template)) or 0
  80. if model_context_tokens is None:
  81. return -1
  82. if max_tokens is None:
  83. max_tokens = 0
  84. prompt_tokens = model_type_instance.get_num_tokens(
  85. model_config.model,
  86. model_config.credentials,
  87. prompt_messages
  88. )
  89. if prompt_tokens + max_tokens > model_context_tokens:
  90. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  91. for parameter_rule in model_config.model_schema.parameter_rules:
  92. if (parameter_rule.name == 'max_tokens'
  93. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  94. model_config.parameters[parameter_rule.name] = max_tokens
  95. def organize_prompt_messages(self, app_record: App,
  96. model_config: ModelConfigEntity,
  97. prompt_template_entity: PromptTemplateEntity,
  98. inputs: dict[str, str],
  99. files: list[FileObj],
  100. query: Optional[str] = None,
  101. context: Optional[str] = None,
  102. memory: Optional[TokenBufferMemory] = None) \
  103. -> Tuple[List[PromptMessage], Optional[List[str]]]:
  104. """
  105. Organize prompt messages
  106. :param context:
  107. :param app_record: app record
  108. :param model_config: model config entity
  109. :param prompt_template_entity: prompt template entity
  110. :param inputs: inputs
  111. :param files: files
  112. :param query: query
  113. :param memory: memory
  114. :return:
  115. """
  116. prompt_transform = PromptTransform()
  117. # get prompt without memory and context
  118. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  119. prompt_messages, stop = prompt_transform.get_prompt(
  120. app_mode=app_record.mode,
  121. prompt_template_entity=prompt_template_entity,
  122. inputs=inputs,
  123. query=query if query else '',
  124. files=files,
  125. context=context,
  126. memory=memory,
  127. model_config=model_config
  128. )
  129. else:
  130. prompt_messages = prompt_transform.get_advanced_prompt(
  131. app_mode=app_record.mode,
  132. prompt_template_entity=prompt_template_entity,
  133. inputs=inputs,
  134. query=query,
  135. files=files,
  136. context=context,
  137. memory=memory,
  138. model_config=model_config
  139. )
  140. stop = model_config.stop
  141. return prompt_messages, stop
  142. def direct_output(self, queue_manager: ApplicationQueueManager,
  143. app_orchestration_config: AppOrchestrationConfigEntity,
  144. prompt_messages: list,
  145. text: str,
  146. stream: bool,
  147. usage: Optional[LLMUsage] = None) -> None:
  148. """
  149. Direct output
  150. :param queue_manager: application queue manager
  151. :param app_orchestration_config: app orchestration config
  152. :param prompt_messages: prompt messages
  153. :param text: text
  154. :param stream: stream
  155. :param usage: usage
  156. :return:
  157. """
  158. if stream:
  159. index = 0
  160. for token in text:
  161. queue_manager.publish_chunk_message(LLMResultChunk(
  162. model=app_orchestration_config.model_config.model,
  163. prompt_messages=prompt_messages,
  164. delta=LLMResultChunkDelta(
  165. index=index,
  166. message=AssistantPromptMessage(content=token)
  167. )
  168. ), PublishFrom.APPLICATION_MANAGER)
  169. index += 1
  170. time.sleep(0.01)
  171. queue_manager.publish_message_end(
  172. llm_result=LLMResult(
  173. model=app_orchestration_config.model_config.model,
  174. prompt_messages=prompt_messages,
  175. message=AssistantPromptMessage(content=text),
  176. usage=usage if usage else LLMUsage.empty_usage()
  177. ),
  178. pub_from=PublishFrom.APPLICATION_MANAGER
  179. )
  180. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  181. queue_manager: ApplicationQueueManager,
  182. stream: bool,
  183. agent: bool = False) -> None:
  184. """
  185. Handle invoke result
  186. :param invoke_result: invoke result
  187. :param queue_manager: application queue manager
  188. :param stream: stream
  189. :return:
  190. """
  191. if not stream:
  192. self._handle_invoke_result_direct(
  193. invoke_result=invoke_result,
  194. queue_manager=queue_manager,
  195. agent=agent
  196. )
  197. else:
  198. self._handle_invoke_result_stream(
  199. invoke_result=invoke_result,
  200. queue_manager=queue_manager,
  201. agent=agent
  202. )
  203. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  204. queue_manager: ApplicationQueueManager,
  205. agent: bool) -> None:
  206. """
  207. Handle invoke result direct
  208. :param invoke_result: invoke result
  209. :param queue_manager: application queue manager
  210. :return:
  211. """
  212. queue_manager.publish_message_end(
  213. llm_result=invoke_result,
  214. pub_from=PublishFrom.APPLICATION_MANAGER
  215. )
  216. def _handle_invoke_result_stream(self, invoke_result: Generator,
  217. queue_manager: ApplicationQueueManager,
  218. agent: bool) -> None:
  219. """
  220. Handle invoke result
  221. :param invoke_result: invoke result
  222. :param queue_manager: application queue manager
  223. :return:
  224. """
  225. model = None
  226. prompt_messages = []
  227. text = ''
  228. usage = None
  229. for result in invoke_result:
  230. if not agent:
  231. queue_manager.publish_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  232. else:
  233. queue_manager.publish_agent_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  234. text += result.delta.message.content
  235. if not model:
  236. model = result.model
  237. if not prompt_messages:
  238. prompt_messages = result.prompt_messages
  239. if not usage and result.delta.usage:
  240. usage = result.delta.usage
  241. if not usage:
  242. usage = LLMUsage.empty_usage()
  243. llm_result = LLMResult(
  244. model=model,
  245. prompt_messages=prompt_messages,
  246. message=AssistantPromptMessage(content=text),
  247. usage=usage
  248. )
  249. queue_manager.publish_message_end(
  250. llm_result=llm_result,
  251. pub_from=PublishFrom.APPLICATION_MANAGER
  252. )
  253. def moderation_for_inputs(self, app_id: str,
  254. tenant_id: str,
  255. app_orchestration_config_entity: AppOrchestrationConfigEntity,
  256. inputs: dict,
  257. query: str) -> Tuple[bool, dict, str]:
  258. """
  259. Process sensitive_word_avoidance.
  260. :param app_id: app id
  261. :param tenant_id: tenant id
  262. :param app_orchestration_config_entity: app orchestration config entity
  263. :param inputs: inputs
  264. :param query: query
  265. :return:
  266. """
  267. moderation_feature = ModerationFeature()
  268. return moderation_feature.check(
  269. app_id=app_id,
  270. tenant_id=tenant_id,
  271. app_orchestration_config_entity=app_orchestration_config_entity,
  272. inputs=inputs,
  273. query=query,
  274. )
  275. def check_hosting_moderation(self, application_generate_entity: ApplicationGenerateEntity,
  276. queue_manager: ApplicationQueueManager,
  277. prompt_messages: list[PromptMessage]) -> bool:
  278. """
  279. Check hosting moderation
  280. :param application_generate_entity: application generate entity
  281. :param queue_manager: queue manager
  282. :param prompt_messages: prompt messages
  283. :return:
  284. """
  285. hosting_moderation_feature = HostingModerationFeature()
  286. moderation_result = hosting_moderation_feature.check(
  287. application_generate_entity=application_generate_entity,
  288. prompt_messages=prompt_messages
  289. )
  290. if moderation_result:
  291. self.direct_output(
  292. queue_manager=queue_manager,
  293. app_orchestration_config=application_generate_entity.app_orchestration_config_entity,
  294. prompt_messages=prompt_messages,
  295. text="I apologize for any confusion, " \
  296. "but I'm an AI assistant to be helpful, harmless, and honest.",
  297. stream=application_generate_entity.stream
  298. )
  299. return moderation_result
  300. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  301. app_id: str,
  302. external_data_tools: list[ExternalDataVariableEntity],
  303. inputs: dict,
  304. query: str) -> dict:
  305. """
  306. Fill in variable inputs from external data tools if exists.
  307. :param tenant_id: workspace id
  308. :param app_id: app id
  309. :param external_data_tools: external data tools configs
  310. :param inputs: the inputs
  311. :param query: the query
  312. :return: the filled inputs
  313. """
  314. external_data_fetch_feature = ExternalDataFetchFeature()
  315. return external_data_fetch_feature.fetch(
  316. tenant_id=tenant_id,
  317. app_id=app_id,
  318. external_data_tools=external_data_tools,
  319. inputs=inputs,
  320. query=query
  321. )
  322. def query_app_annotations_to_reply(self, app_record: App,
  323. message: Message,
  324. query: str,
  325. user_id: str,
  326. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  327. """
  328. Query app annotations to reply
  329. :param app_record: app record
  330. :param message: message
  331. :param query: query
  332. :param user_id: user id
  333. :param invoke_from: invoke from
  334. :return:
  335. """
  336. annotation_reply_feature = AnnotationReplyFeature()
  337. return annotation_reply_feature.query(
  338. app_record=app_record,
  339. message=message,
  340. query=query,
  341. user_id=user_id,
  342. invoke_from=invoke_from
  343. )