app_runner.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. import time
  2. from typing import Generator, List, Optional, Tuple, Union, cast
  3. from core.application_queue_manager import ApplicationQueueManager, PublishFrom
  4. from core.entities.application_entities import (ApplicationGenerateEntity, AppOrchestrationConfigEntity,
  5. ExternalDataVariableEntity, InvokeFrom, ModelConfigEntity,
  6. PromptTemplateEntity)
  7. from core.features.annotation_reply import AnnotationReplyFeature
  8. from core.features.external_data_fetch import ExternalDataFetchFeature
  9. from core.features.hosting_moderation import HostingModerationFeature
  10. from core.features.moderation import ModerationFeature
  11. from core.file.file_obj import FileObj
  12. from core.memory.token_buffer_memory import TokenBufferMemory
  13. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  14. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  15. from core.model_runtime.entities.model_entities import ModelPropertyKey
  16. from core.model_runtime.errors.invoke import InvokeBadRequestError
  17. from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
  18. from core.prompt.prompt_transform import PromptTransform
  19. from models.model import App, Message, MessageAnnotation
  20. class AppRunner:
  21. def get_pre_calculate_rest_tokens(self, app_record: App,
  22. model_config: ModelConfigEntity,
  23. prompt_template_entity: PromptTemplateEntity,
  24. inputs: dict[str, str],
  25. files: list[FileObj],
  26. query: Optional[str] = None) -> int:
  27. """
  28. Get pre calculate rest tokens
  29. :param app_record: app record
  30. :param model_config: model config entity
  31. :param prompt_template_entity: prompt template entity
  32. :param inputs: inputs
  33. :param files: files
  34. :param query: query
  35. :return:
  36. """
  37. model_type_instance = model_config.provider_model_bundle.model_type_instance
  38. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  39. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  40. max_tokens = 0
  41. for parameter_rule in model_config.model_schema.parameter_rules:
  42. if (parameter_rule.name == 'max_tokens'
  43. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  44. max_tokens = (model_config.parameters.get(parameter_rule.name)
  45. or model_config.parameters.get(parameter_rule.use_template)) or 0
  46. if model_context_tokens is None:
  47. return -1
  48. if max_tokens is None:
  49. max_tokens = 0
  50. # get prompt messages without memory and context
  51. prompt_messages, stop = self.organize_prompt_messages(
  52. app_record=app_record,
  53. model_config=model_config,
  54. prompt_template_entity=prompt_template_entity,
  55. inputs=inputs,
  56. files=files,
  57. query=query
  58. )
  59. prompt_tokens = model_type_instance.get_num_tokens(
  60. model_config.model,
  61. model_config.credentials,
  62. prompt_messages
  63. )
  64. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  65. if rest_tokens < 0:
  66. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  67. "or shrink the max token, or switch to a llm with a larger token limit size.")
  68. return rest_tokens
  69. def recale_llm_max_tokens(self, model_config: ModelConfigEntity,
  70. prompt_messages: List[PromptMessage]):
  71. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  72. model_type_instance = model_config.provider_model_bundle.model_type_instance
  73. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  74. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  75. max_tokens = 0
  76. for parameter_rule in model_config.model_schema.parameter_rules:
  77. if (parameter_rule.name == 'max_tokens'
  78. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  79. max_tokens = (model_config.parameters.get(parameter_rule.name)
  80. or model_config.parameters.get(parameter_rule.use_template)) or 0
  81. if model_context_tokens is None:
  82. return -1
  83. if max_tokens is None:
  84. max_tokens = 0
  85. prompt_tokens = model_type_instance.get_num_tokens(
  86. model_config.model,
  87. model_config.credentials,
  88. prompt_messages
  89. )
  90. if prompt_tokens + max_tokens > model_context_tokens:
  91. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  92. for parameter_rule in model_config.model_schema.parameter_rules:
  93. if (parameter_rule.name == 'max_tokens'
  94. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  95. model_config.parameters[parameter_rule.name] = max_tokens
  96. def organize_prompt_messages(self, app_record: App,
  97. model_config: ModelConfigEntity,
  98. prompt_template_entity: PromptTemplateEntity,
  99. inputs: dict[str, str],
  100. files: list[FileObj],
  101. query: Optional[str] = None,
  102. context: Optional[str] = None,
  103. memory: Optional[TokenBufferMemory] = None) \
  104. -> Tuple[List[PromptMessage], Optional[List[str]]]:
  105. """
  106. Organize prompt messages
  107. :param context:
  108. :param app_record: app record
  109. :param model_config: model config entity
  110. :param prompt_template_entity: prompt template entity
  111. :param inputs: inputs
  112. :param files: files
  113. :param query: query
  114. :param memory: memory
  115. :return:
  116. """
  117. prompt_transform = PromptTransform()
  118. # get prompt without memory and context
  119. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  120. prompt_messages, stop = prompt_transform.get_prompt(
  121. app_mode=app_record.mode,
  122. prompt_template_entity=prompt_template_entity,
  123. inputs=inputs,
  124. query=query if query else '',
  125. files=files,
  126. context=context,
  127. memory=memory,
  128. model_config=model_config
  129. )
  130. else:
  131. prompt_messages = prompt_transform.get_advanced_prompt(
  132. app_mode=app_record.mode,
  133. prompt_template_entity=prompt_template_entity,
  134. inputs=inputs,
  135. query=query,
  136. files=files,
  137. context=context,
  138. memory=memory,
  139. model_config=model_config
  140. )
  141. stop = model_config.stop
  142. return prompt_messages, stop
  143. def direct_output(self, queue_manager: ApplicationQueueManager,
  144. app_orchestration_config: AppOrchestrationConfigEntity,
  145. prompt_messages: list,
  146. text: str,
  147. stream: bool,
  148. usage: Optional[LLMUsage] = None) -> None:
  149. """
  150. Direct output
  151. :param queue_manager: application queue manager
  152. :param app_orchestration_config: app orchestration config
  153. :param prompt_messages: prompt messages
  154. :param text: text
  155. :param stream: stream
  156. :param usage: usage
  157. :return:
  158. """
  159. if stream:
  160. index = 0
  161. for token in text:
  162. queue_manager.publish_chunk_message(LLMResultChunk(
  163. model=app_orchestration_config.model_config.model,
  164. prompt_messages=prompt_messages,
  165. delta=LLMResultChunkDelta(
  166. index=index,
  167. message=AssistantPromptMessage(content=token)
  168. )
  169. ), PublishFrom.APPLICATION_MANAGER)
  170. index += 1
  171. time.sleep(0.01)
  172. queue_manager.publish_message_end(
  173. llm_result=LLMResult(
  174. model=app_orchestration_config.model_config.model,
  175. prompt_messages=prompt_messages,
  176. message=AssistantPromptMessage(content=text),
  177. usage=usage if usage else LLMUsage.empty_usage()
  178. ),
  179. pub_from=PublishFrom.APPLICATION_MANAGER
  180. )
  181. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  182. queue_manager: ApplicationQueueManager,
  183. stream: bool,
  184. agent: bool = False) -> None:
  185. """
  186. Handle invoke result
  187. :param invoke_result: invoke result
  188. :param queue_manager: application queue manager
  189. :param stream: stream
  190. :return:
  191. """
  192. if not stream:
  193. self._handle_invoke_result_direct(
  194. invoke_result=invoke_result,
  195. queue_manager=queue_manager,
  196. agent=agent
  197. )
  198. else:
  199. self._handle_invoke_result_stream(
  200. invoke_result=invoke_result,
  201. queue_manager=queue_manager,
  202. agent=agent
  203. )
  204. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  205. queue_manager: ApplicationQueueManager,
  206. agent: bool) -> None:
  207. """
  208. Handle invoke result direct
  209. :param invoke_result: invoke result
  210. :param queue_manager: application queue manager
  211. :return:
  212. """
  213. queue_manager.publish_message_end(
  214. llm_result=invoke_result,
  215. pub_from=PublishFrom.APPLICATION_MANAGER
  216. )
  217. def _handle_invoke_result_stream(self, invoke_result: Generator,
  218. queue_manager: ApplicationQueueManager,
  219. agent: bool) -> None:
  220. """
  221. Handle invoke result
  222. :param invoke_result: invoke result
  223. :param queue_manager: application queue manager
  224. :return:
  225. """
  226. model = None
  227. prompt_messages = []
  228. text = ''
  229. usage = None
  230. for result in invoke_result:
  231. if not agent:
  232. queue_manager.publish_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  233. else:
  234. queue_manager.publish_agent_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  235. text += result.delta.message.content
  236. if not model:
  237. model = result.model
  238. if not prompt_messages:
  239. prompt_messages = result.prompt_messages
  240. if not usage and result.delta.usage:
  241. usage = result.delta.usage
  242. if not usage:
  243. usage = LLMUsage.empty_usage()
  244. llm_result = LLMResult(
  245. model=model,
  246. prompt_messages=prompt_messages,
  247. message=AssistantPromptMessage(content=text),
  248. usage=usage
  249. )
  250. queue_manager.publish_message_end(
  251. llm_result=llm_result,
  252. pub_from=PublishFrom.APPLICATION_MANAGER
  253. )
  254. def moderation_for_inputs(self, app_id: str,
  255. tenant_id: str,
  256. app_orchestration_config_entity: AppOrchestrationConfigEntity,
  257. inputs: dict,
  258. query: str) -> Tuple[bool, dict, str]:
  259. """
  260. Process sensitive_word_avoidance.
  261. :param app_id: app id
  262. :param tenant_id: tenant id
  263. :param app_orchestration_config_entity: app orchestration config entity
  264. :param inputs: inputs
  265. :param query: query
  266. :return:
  267. """
  268. moderation_feature = ModerationFeature()
  269. return moderation_feature.check(
  270. app_id=app_id,
  271. tenant_id=tenant_id,
  272. app_orchestration_config_entity=app_orchestration_config_entity,
  273. inputs=inputs,
  274. query=query,
  275. )
  276. def check_hosting_moderation(self, application_generate_entity: ApplicationGenerateEntity,
  277. queue_manager: ApplicationQueueManager,
  278. prompt_messages: list[PromptMessage]) -> bool:
  279. """
  280. Check hosting moderation
  281. :param application_generate_entity: application generate entity
  282. :param queue_manager: queue manager
  283. :param prompt_messages: prompt messages
  284. :return:
  285. """
  286. hosting_moderation_feature = HostingModerationFeature()
  287. moderation_result = hosting_moderation_feature.check(
  288. application_generate_entity=application_generate_entity,
  289. prompt_messages=prompt_messages
  290. )
  291. if moderation_result:
  292. self.direct_output(
  293. queue_manager=queue_manager,
  294. app_orchestration_config=application_generate_entity.app_orchestration_config_entity,
  295. prompt_messages=prompt_messages,
  296. text="I apologize for any confusion, " \
  297. "but I'm an AI assistant to be helpful, harmless, and honest.",
  298. stream=application_generate_entity.stream
  299. )
  300. return moderation_result
  301. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  302. app_id: str,
  303. external_data_tools: list[ExternalDataVariableEntity],
  304. inputs: dict,
  305. query: str) -> dict:
  306. """
  307. Fill in variable inputs from external data tools if exists.
  308. :param tenant_id: workspace id
  309. :param app_id: app id
  310. :param external_data_tools: external data tools configs
  311. :param inputs: the inputs
  312. :param query: the query
  313. :return: the filled inputs
  314. """
  315. external_data_fetch_feature = ExternalDataFetchFeature()
  316. return external_data_fetch_feature.fetch(
  317. tenant_id=tenant_id,
  318. app_id=app_id,
  319. external_data_tools=external_data_tools,
  320. inputs=inputs,
  321. query=query
  322. )
  323. def query_app_annotations_to_reply(self, app_record: App,
  324. message: Message,
  325. query: str,
  326. user_id: str,
  327. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  328. """
  329. Query app annotations to reply
  330. :param app_record: app record
  331. :param message: message
  332. :param query: query
  333. :param user_id: user id
  334. :param invoke_from: invoke from
  335. :return:
  336. """
  337. annotation_reply_feature = AnnotationReplyFeature()
  338. return annotation_reply_feature.query(
  339. app_record=app_record,
  340. message=message,
  341. query=query,
  342. user_id=user_id,
  343. invoke_from=invoke_from
  344. )