app_model_config_service.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. import re
  2. import uuid
  3. from core.external_data_tool.factory import ExternalDataToolFactory
  4. from core.moderation.factory import ModerationFactory
  5. from core.prompt.prompt_transform import AppMode
  6. from core.agent.agent_executor import PlanningStrategy
  7. from core.model_providers.model_provider_factory import ModelProviderFactory
  8. from core.model_providers.models.entity.model_params import ModelType, ModelMode
  9. from models.account import Account
  10. from services.dataset_service import DatasetService
  11. SUPPORT_TOOLS = ["dataset", "google_search", "web_reader", "wikipedia", "current_datetime"]
  12. class AppModelConfigService:
  13. @classmethod
  14. def is_dataset_exists(cls, account: Account, dataset_id: str) -> bool:
  15. # verify if the dataset ID exists
  16. dataset = DatasetService.get_dataset(dataset_id)
  17. if not dataset:
  18. return False
  19. if dataset.tenant_id != account.current_tenant_id:
  20. return False
  21. return True
  22. @classmethod
  23. def validate_model_completion_params(cls, cp: dict, model_name: str) -> dict:
  24. # 6. model.completion_params
  25. if not isinstance(cp, dict):
  26. raise ValueError("model.completion_params must be of object type")
  27. # max_tokens
  28. if 'max_tokens' not in cp:
  29. cp["max_tokens"] = 512
  30. # temperature
  31. if 'temperature' not in cp:
  32. cp["temperature"] = 1
  33. # top_p
  34. if 'top_p' not in cp:
  35. cp["top_p"] = 1
  36. # presence_penalty
  37. if 'presence_penalty' not in cp:
  38. cp["presence_penalty"] = 0
  39. # presence_penalty
  40. if 'frequency_penalty' not in cp:
  41. cp["frequency_penalty"] = 0
  42. # stop
  43. if 'stop' not in cp:
  44. cp["stop"] = []
  45. elif not isinstance(cp["stop"], list):
  46. raise ValueError("stop in model.completion_params must be of list type")
  47. if len(cp["stop"]) > 4:
  48. raise ValueError("stop sequences must be less than 4")
  49. # Filter out extra parameters
  50. filtered_cp = {
  51. "max_tokens": cp["max_tokens"],
  52. "temperature": cp["temperature"],
  53. "top_p": cp["top_p"],
  54. "presence_penalty": cp["presence_penalty"],
  55. "frequency_penalty": cp["frequency_penalty"],
  56. "stop": cp["stop"]
  57. }
  58. return filtered_cp
  59. @classmethod
  60. def validate_configuration(cls, tenant_id: str, account: Account, config: dict, mode: str) -> dict:
  61. # opening_statement
  62. if 'opening_statement' not in config or not config["opening_statement"]:
  63. config["opening_statement"] = ""
  64. if not isinstance(config["opening_statement"], str):
  65. raise ValueError("opening_statement must be of string type")
  66. # suggested_questions
  67. if 'suggested_questions' not in config or not config["suggested_questions"]:
  68. config["suggested_questions"] = []
  69. if not isinstance(config["suggested_questions"], list):
  70. raise ValueError("suggested_questions must be of list type")
  71. for question in config["suggested_questions"]:
  72. if not isinstance(question, str):
  73. raise ValueError("Elements in suggested_questions list must be of string type")
  74. # suggested_questions_after_answer
  75. if 'suggested_questions_after_answer' not in config or not config["suggested_questions_after_answer"]:
  76. config["suggested_questions_after_answer"] = {
  77. "enabled": False
  78. }
  79. if not isinstance(config["suggested_questions_after_answer"], dict):
  80. raise ValueError("suggested_questions_after_answer must be of dict type")
  81. if "enabled" not in config["suggested_questions_after_answer"] or not config["suggested_questions_after_answer"]["enabled"]:
  82. config["suggested_questions_after_answer"]["enabled"] = False
  83. if not isinstance(config["suggested_questions_after_answer"]["enabled"], bool):
  84. raise ValueError("enabled in suggested_questions_after_answer must be of boolean type")
  85. # speech_to_text
  86. if 'speech_to_text' not in config or not config["speech_to_text"]:
  87. config["speech_to_text"] = {
  88. "enabled": False
  89. }
  90. if not isinstance(config["speech_to_text"], dict):
  91. raise ValueError("speech_to_text must be of dict type")
  92. if "enabled" not in config["speech_to_text"] or not config["speech_to_text"]["enabled"]:
  93. config["speech_to_text"]["enabled"] = False
  94. if not isinstance(config["speech_to_text"]["enabled"], bool):
  95. raise ValueError("enabled in speech_to_text must be of boolean type")
  96. # return retriever resource
  97. if 'retriever_resource' not in config or not config["retriever_resource"]:
  98. config["retriever_resource"] = {
  99. "enabled": False
  100. }
  101. if not isinstance(config["retriever_resource"], dict):
  102. raise ValueError("retriever_resource must be of dict type")
  103. if "enabled" not in config["retriever_resource"] or not config["retriever_resource"]["enabled"]:
  104. config["retriever_resource"]["enabled"] = False
  105. if not isinstance(config["retriever_resource"]["enabled"], bool):
  106. raise ValueError("enabled in speech_to_text must be of boolean type")
  107. # more_like_this
  108. if 'more_like_this' not in config or not config["more_like_this"]:
  109. config["more_like_this"] = {
  110. "enabled": False
  111. }
  112. if not isinstance(config["more_like_this"], dict):
  113. raise ValueError("more_like_this must be of dict type")
  114. if "enabled" not in config["more_like_this"] or not config["more_like_this"]["enabled"]:
  115. config["more_like_this"]["enabled"] = False
  116. if not isinstance(config["more_like_this"]["enabled"], bool):
  117. raise ValueError("enabled in more_like_this must be of boolean type")
  118. # model
  119. if 'model' not in config:
  120. raise ValueError("model is required")
  121. if not isinstance(config["model"], dict):
  122. raise ValueError("model must be of object type")
  123. # model.provider
  124. model_provider_names = ModelProviderFactory.get_provider_names()
  125. if 'provider' not in config["model"] or config["model"]["provider"] not in model_provider_names:
  126. raise ValueError(f"model.provider is required and must be in {str(model_provider_names)}")
  127. # model.name
  128. if 'name' not in config["model"]:
  129. raise ValueError("model.name is required")
  130. model_provider = ModelProviderFactory.get_preferred_model_provider(tenant_id, config["model"]["provider"])
  131. if not model_provider:
  132. raise ValueError("model.name must be in the specified model list")
  133. model_list = model_provider.get_supported_model_list(ModelType.TEXT_GENERATION)
  134. model_ids = [m['id'] for m in model_list]
  135. if config["model"]["name"] not in model_ids:
  136. raise ValueError("model.name must be in the specified model list")
  137. # model.mode
  138. if 'mode' not in config['model'] or not config['model']["mode"]:
  139. config['model']["mode"] = ""
  140. # model.completion_params
  141. if 'completion_params' not in config["model"]:
  142. raise ValueError("model.completion_params is required")
  143. config["model"]["completion_params"] = cls.validate_model_completion_params(
  144. config["model"]["completion_params"],
  145. config["model"]["name"]
  146. )
  147. # user_input_form
  148. if "user_input_form" not in config or not config["user_input_form"]:
  149. config["user_input_form"] = []
  150. if not isinstance(config["user_input_form"], list):
  151. raise ValueError("user_input_form must be a list of objects")
  152. variables = []
  153. for item in config["user_input_form"]:
  154. key = list(item.keys())[0]
  155. if key not in ["text-input", "select", "paragraph"]:
  156. raise ValueError("Keys in user_input_form list can only be 'text-input', 'paragraph' or 'select'")
  157. form_item = item[key]
  158. if 'label' not in form_item:
  159. raise ValueError("label is required in user_input_form")
  160. if not isinstance(form_item["label"], str):
  161. raise ValueError("label in user_input_form must be of string type")
  162. if 'variable' not in form_item:
  163. raise ValueError("variable is required in user_input_form")
  164. if not isinstance(form_item["variable"], str):
  165. raise ValueError("variable in user_input_form must be of string type")
  166. pattern = re.compile(r"^(?!\d)[\u4e00-\u9fa5A-Za-z0-9_\U0001F300-\U0001F64F\U0001F680-\U0001F6FF]{1,100}$")
  167. if pattern.match(form_item["variable"]) is None:
  168. raise ValueError("variable in user_input_form must be a string, "
  169. "and cannot start with a number")
  170. variables.append(form_item["variable"])
  171. if 'required' not in form_item or not form_item["required"]:
  172. form_item["required"] = False
  173. if not isinstance(form_item["required"], bool):
  174. raise ValueError("required in user_input_form must be of boolean type")
  175. if key == "select":
  176. if 'options' not in form_item or not form_item["options"]:
  177. form_item["options"] = []
  178. if not isinstance(form_item["options"], list):
  179. raise ValueError("options in user_input_form must be a list of strings")
  180. if "default" in form_item and form_item['default'] \
  181. and form_item["default"] not in form_item["options"]:
  182. raise ValueError("default value in user_input_form must be in the options list")
  183. # pre_prompt
  184. if "pre_prompt" not in config or not config["pre_prompt"]:
  185. config["pre_prompt"] = ""
  186. if not isinstance(config["pre_prompt"], str):
  187. raise ValueError("pre_prompt must be of string type")
  188. # agent_mode
  189. if "agent_mode" not in config or not config["agent_mode"]:
  190. config["agent_mode"] = {
  191. "enabled": False,
  192. "tools": []
  193. }
  194. if not isinstance(config["agent_mode"], dict):
  195. raise ValueError("agent_mode must be of object type")
  196. if "enabled" not in config["agent_mode"] or not config["agent_mode"]["enabled"]:
  197. config["agent_mode"]["enabled"] = False
  198. if not isinstance(config["agent_mode"]["enabled"], bool):
  199. raise ValueError("enabled in agent_mode must be of boolean type")
  200. if "strategy" not in config["agent_mode"] or not config["agent_mode"]["strategy"]:
  201. config["agent_mode"]["strategy"] = PlanningStrategy.ROUTER.value
  202. if config["agent_mode"]["strategy"] not in [member.value for member in list(PlanningStrategy.__members__.values())]:
  203. raise ValueError("strategy in agent_mode must be in the specified strategy list")
  204. if "tools" not in config["agent_mode"] or not config["agent_mode"]["tools"]:
  205. config["agent_mode"]["tools"] = []
  206. if not isinstance(config["agent_mode"]["tools"], list):
  207. raise ValueError("tools in agent_mode must be a list of objects")
  208. for tool in config["agent_mode"]["tools"]:
  209. key = list(tool.keys())[0]
  210. if key not in SUPPORT_TOOLS:
  211. raise ValueError("Keys in agent_mode.tools must be in the specified tool list")
  212. tool_item = tool[key]
  213. if "enabled" not in tool_item or not tool_item["enabled"]:
  214. tool_item["enabled"] = False
  215. if not isinstance(tool_item["enabled"], bool):
  216. raise ValueError("enabled in agent_mode.tools must be of boolean type")
  217. if key == "dataset":
  218. if 'id' not in tool_item:
  219. raise ValueError("id is required in dataset")
  220. try:
  221. uuid.UUID(tool_item["id"])
  222. except ValueError:
  223. raise ValueError("id in dataset must be of UUID type")
  224. if not cls.is_dataset_exists(account, tool_item["id"]):
  225. raise ValueError("Dataset ID does not exist, please check your permission.")
  226. # dataset_query_variable
  227. cls.is_dataset_query_variable_valid(config, mode)
  228. # advanced prompt validation
  229. cls.is_advanced_prompt_valid(config, mode)
  230. # external data tools validation
  231. cls.is_external_data_tools_valid(tenant_id, config)
  232. # moderation validation
  233. cls.is_moderation_valid(tenant_id, config)
  234. # Filter out extra parameters
  235. filtered_config = {
  236. "opening_statement": config["opening_statement"],
  237. "suggested_questions": config["suggested_questions"],
  238. "suggested_questions_after_answer": config["suggested_questions_after_answer"],
  239. "speech_to_text": config["speech_to_text"],
  240. "retriever_resource": config["retriever_resource"],
  241. "more_like_this": config["more_like_this"],
  242. "sensitive_word_avoidance": config["sensitive_word_avoidance"],
  243. "external_data_tools": config["external_data_tools"],
  244. "model": {
  245. "provider": config["model"]["provider"],
  246. "name": config["model"]["name"],
  247. "mode": config['model']["mode"],
  248. "completion_params": config["model"]["completion_params"]
  249. },
  250. "user_input_form": config["user_input_form"],
  251. "dataset_query_variable": config.get('dataset_query_variable'),
  252. "pre_prompt": config["pre_prompt"],
  253. "agent_mode": config["agent_mode"],
  254. "prompt_type": config["prompt_type"],
  255. "chat_prompt_config": config["chat_prompt_config"],
  256. "completion_prompt_config": config["completion_prompt_config"],
  257. "dataset_configs": config["dataset_configs"]
  258. }
  259. return filtered_config
  260. @classmethod
  261. def is_moderation_valid(cls, tenant_id: str, config: dict):
  262. if 'sensitive_word_avoidance' not in config or not config["sensitive_word_avoidance"]:
  263. config["sensitive_word_avoidance"] = {
  264. "enabled": False
  265. }
  266. if not isinstance(config["sensitive_word_avoidance"], dict):
  267. raise ValueError("sensitive_word_avoidance must be of dict type")
  268. if "enabled" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["enabled"]:
  269. config["sensitive_word_avoidance"]["enabled"] = False
  270. if not config["sensitive_word_avoidance"]["enabled"]:
  271. return
  272. if "type" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["type"]:
  273. raise ValueError("sensitive_word_avoidance.type is required")
  274. type = config["sensitive_word_avoidance"]["type"]
  275. config = config["sensitive_word_avoidance"]["config"]
  276. ModerationFactory.validate_config(
  277. name=type,
  278. tenant_id=tenant_id,
  279. config=config
  280. )
  281. @classmethod
  282. def is_external_data_tools_valid(cls, tenant_id: str, config: dict):
  283. if 'external_data_tools' not in config or not config["external_data_tools"]:
  284. config["external_data_tools"] = []
  285. if not isinstance(config["external_data_tools"], list):
  286. raise ValueError("external_data_tools must be of list type")
  287. for tool in config["external_data_tools"]:
  288. if "enabled" not in tool or not tool["enabled"]:
  289. tool["enabled"] = False
  290. if not tool["enabled"]:
  291. continue
  292. if "type" not in tool or not tool["type"]:
  293. raise ValueError("external_data_tools[].type is required")
  294. type = tool["type"]
  295. config = tool["config"]
  296. ExternalDataToolFactory.validate_config(
  297. name=type,
  298. tenant_id=tenant_id,
  299. config=config
  300. )
  301. @classmethod
  302. def is_dataset_query_variable_valid(cls, config: dict, mode: str) -> None:
  303. # Only check when mode is completion
  304. if mode != 'completion':
  305. return
  306. agent_mode = config.get("agent_mode", {})
  307. tools = agent_mode.get("tools", [])
  308. dataset_exists = "dataset" in str(tools)
  309. dataset_query_variable = config.get("dataset_query_variable")
  310. if dataset_exists and not dataset_query_variable:
  311. raise ValueError("Dataset query variable is required when dataset is exist")
  312. @classmethod
  313. def is_advanced_prompt_valid(cls, config: dict, app_mode: str) -> None:
  314. # prompt_type
  315. if 'prompt_type' not in config or not config["prompt_type"]:
  316. config["prompt_type"] = "simple"
  317. if config['prompt_type'] not in ['simple', 'advanced']:
  318. raise ValueError("prompt_type must be in ['simple', 'advanced']")
  319. # chat_prompt_config
  320. if 'chat_prompt_config' not in config or not config["chat_prompt_config"]:
  321. config["chat_prompt_config"] = {}
  322. if not isinstance(config["chat_prompt_config"], dict):
  323. raise ValueError("chat_prompt_config must be of object type")
  324. # completion_prompt_config
  325. if 'completion_prompt_config' not in config or not config["completion_prompt_config"]:
  326. config["completion_prompt_config"] = {}
  327. if not isinstance(config["completion_prompt_config"], dict):
  328. raise ValueError("completion_prompt_config must be of object type")
  329. # dataset_configs
  330. if 'dataset_configs' not in config or not config["dataset_configs"]:
  331. config["dataset_configs"] = {"top_k": 2, "score_threshold": {"enable": False}}
  332. if not isinstance(config["dataset_configs"], dict):
  333. raise ValueError("dataset_configs must be of object type")
  334. if config['prompt_type'] == 'advanced':
  335. if not config['chat_prompt_config'] and not config['completion_prompt_config']:
  336. raise ValueError("chat_prompt_config or completion_prompt_config is required when prompt_type is advanced")
  337. if config['model']["mode"] not in ['chat', 'completion']:
  338. raise ValueError("model.mode must be in ['chat', 'completion'] when prompt_type is advanced")
  339. if app_mode == AppMode.CHAT.value and config['model']["mode"] == ModelMode.COMPLETION.value:
  340. user_prefix = config['completion_prompt_config']['conversation_histories_role']['user_prefix']
  341. assistant_prefix = config['completion_prompt_config']['conversation_histories_role']['assistant_prefix']
  342. if not user_prefix:
  343. config['completion_prompt_config']['conversation_histories_role']['user_prefix'] = 'Human'
  344. if not assistant_prefix:
  345. config['completion_prompt_config']['conversation_histories_role']['assistant_prefix'] = 'Assistant'
  346. if config['model']["mode"] == ModelMode.CHAT.value:
  347. prompt_list = config['chat_prompt_config']['prompt']
  348. if len(prompt_list) > 10:
  349. raise ValueError("prompt messages must be less than 10")