parser.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. import re
  2. import uuid
  3. from json import dumps as json_dumps
  4. from json import loads as json_loads
  5. from json.decoder import JSONDecodeError
  6. from typing import Optional
  7. from flask import request
  8. from requests import get
  9. from yaml import YAMLError, safe_load # type: ignore
  10. from core.tools.entities.common_entities import I18nObject
  11. from core.tools.entities.tool_bundle import ApiToolBundle
  12. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  13. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  14. class ApiBasedToolSchemaParser:
  15. @staticmethod
  16. def parse_openapi_to_tool_bundle(
  17. openapi: dict, extra_info: dict | None = None, warning: dict | None = None
  18. ) -> list[ApiToolBundle]:
  19. warning = warning if warning is not None else {}
  20. extra_info = extra_info if extra_info is not None else {}
  21. # set description to extra_info
  22. extra_info["description"] = openapi["info"].get("description", "")
  23. if len(openapi["servers"]) == 0:
  24. raise ToolProviderNotFoundError("No server found in the openapi yaml.")
  25. server_url = openapi["servers"][0]["url"]
  26. request_env = request.headers.get("X-Request-Env")
  27. if request_env:
  28. matched_servers = [server["url"] for server in openapi["servers"] if server["env"] == request_env]
  29. server_url = matched_servers[0] if matched_servers else server_url
  30. # list all interfaces
  31. interfaces = []
  32. for path, path_item in openapi["paths"].items():
  33. methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
  34. for method in methods:
  35. if method in path_item:
  36. interfaces.append(
  37. {
  38. "path": path,
  39. "method": method,
  40. "operation": path_item[method],
  41. }
  42. )
  43. # get all parameters
  44. bundles = []
  45. for interface in interfaces:
  46. # convert parameters
  47. parameters = []
  48. if "parameters" in interface["operation"]:
  49. for parameter in interface["operation"]["parameters"]:
  50. tool_parameter = ToolParameter(
  51. name=parameter["name"],
  52. label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
  53. human_description=I18nObject(
  54. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  55. ),
  56. type=ToolParameter.ToolParameterType.STRING,
  57. required=parameter.get("required", False),
  58. form=ToolParameter.ToolParameterForm.LLM,
  59. llm_description=parameter.get("description"),
  60. default=parameter["schema"]["default"]
  61. if "schema" in parameter and "default" in parameter["schema"]
  62. else None,
  63. placeholder=I18nObject(
  64. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  65. ),
  66. )
  67. # check if there is a type
  68. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  69. if typ:
  70. tool_parameter.type = typ
  71. parameters.append(tool_parameter)
  72. # create tool bundle
  73. # check if there is a request body
  74. if "requestBody" in interface["operation"]:
  75. request_body = interface["operation"]["requestBody"]
  76. if "content" in request_body:
  77. for content_type, content in request_body["content"].items():
  78. # if there is a reference, get the reference and overwrite the content
  79. if "schema" not in content:
  80. continue
  81. if "$ref" in content["schema"]:
  82. # get the reference
  83. root = openapi
  84. reference = content["schema"]["$ref"].split("/")[1:]
  85. for ref in reference:
  86. root = root[ref]
  87. # overwrite the content
  88. interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
  89. # parse body parameters
  90. if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
  91. body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
  92. required = body_schema.get("required", [])
  93. properties = body_schema.get("properties", {})
  94. for name, property in properties.items():
  95. tool = ToolParameter(
  96. name=name,
  97. label=I18nObject(en_US=name, zh_Hans=name),
  98. human_description=I18nObject(
  99. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  100. ),
  101. type=ToolParameter.ToolParameterType.STRING,
  102. required=name in required,
  103. form=ToolParameter.ToolParameterForm.LLM,
  104. llm_description=property.get("description", ""),
  105. default=property.get("default", None),
  106. placeholder=I18nObject(
  107. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  108. ),
  109. )
  110. # check if there is a type
  111. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  112. if typ:
  113. tool.type = typ
  114. parameters.append(tool)
  115. # check if parameters is duplicated
  116. parameters_count = {}
  117. for parameter in parameters:
  118. if parameter.name not in parameters_count:
  119. parameters_count[parameter.name] = 0
  120. parameters_count[parameter.name] += 1
  121. for name, count in parameters_count.items():
  122. if count > 1:
  123. warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
  124. # check if there is a operation id, use $path_$method as operation id if not
  125. if "operationId" not in interface["operation"]:
  126. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  127. path = interface["path"]
  128. if interface["path"].startswith("/"):
  129. path = interface["path"][1:]
  130. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  131. path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
  132. if not path:
  133. path = str(uuid.uuid4())
  134. interface["operation"]["operationId"] = f"{path}_{interface['method']}"
  135. bundles.append(
  136. ApiToolBundle(
  137. server_url=server_url + interface["path"],
  138. method=interface["method"],
  139. summary=interface["operation"]["description"]
  140. if "description" in interface["operation"]
  141. else interface["operation"].get("summary", None),
  142. operation_id=interface["operation"]["operationId"],
  143. parameters=parameters,
  144. author="",
  145. icon=None,
  146. openapi=interface["operation"],
  147. )
  148. )
  149. return bundles
  150. @staticmethod
  151. def _get_tool_parameter_type(parameter: dict) -> Optional[ToolParameter.ToolParameterType]:
  152. parameter = parameter or {}
  153. typ: Optional[str] = None
  154. if parameter.get("format") == "binary":
  155. return ToolParameter.ToolParameterType.FILE
  156. if "type" in parameter:
  157. typ = parameter["type"]
  158. elif "schema" in parameter and "type" in parameter["schema"]:
  159. typ = parameter["schema"]["type"]
  160. if typ in {"integer", "number"}:
  161. return ToolParameter.ToolParameterType.NUMBER
  162. elif typ == "boolean":
  163. return ToolParameter.ToolParameterType.BOOLEAN
  164. elif typ == "string":
  165. return ToolParameter.ToolParameterType.STRING
  166. else:
  167. return None
  168. @staticmethod
  169. def parse_openapi_yaml_to_tool_bundle(
  170. yaml: str, extra_info: dict | None = None, warning: dict | None = None
  171. ) -> list[ApiToolBundle]:
  172. """
  173. parse openapi yaml to tool bundle
  174. :param yaml: the yaml string
  175. :return: the tool bundle
  176. """
  177. warning = warning if warning is not None else {}
  178. extra_info = extra_info if extra_info is not None else {}
  179. openapi: dict = safe_load(yaml)
  180. if openapi is None:
  181. raise ToolApiSchemaError("Invalid openapi yaml.")
  182. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  183. @staticmethod
  184. def parse_swagger_to_openapi(swagger: dict, extra_info: dict | None = None, warning: dict | None = None) -> dict:
  185. warning = warning or {}
  186. """
  187. parse swagger to openapi
  188. :param swagger: the swagger dict
  189. :return: the openapi dict
  190. """
  191. # convert swagger to openapi
  192. info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
  193. servers = swagger.get("servers", [])
  194. if len(servers) == 0:
  195. raise ToolApiSchemaError("No server found in the swagger yaml.")
  196. openapi = {
  197. "openapi": "3.0.0",
  198. "info": {
  199. "title": info.get("title", "Swagger"),
  200. "description": info.get("description", "Swagger"),
  201. "version": info.get("version", "1.0.0"),
  202. },
  203. "servers": swagger["servers"],
  204. "paths": {},
  205. "components": {"schemas": {}},
  206. }
  207. # check paths
  208. if "paths" not in swagger or len(swagger["paths"]) == 0:
  209. raise ToolApiSchemaError("No paths found in the swagger yaml.")
  210. # convert paths
  211. for path, path_item in swagger["paths"].items():
  212. openapi["paths"][path] = {}
  213. for method, operation in path_item.items():
  214. if "operationId" not in operation:
  215. raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
  216. if ("summary" not in operation or len(operation["summary"]) == 0) and (
  217. "description" not in operation or len(operation["description"]) == 0
  218. ):
  219. if warning is not None:
  220. warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
  221. openapi["paths"][path][method] = {
  222. "operationId": operation["operationId"],
  223. "summary": operation.get("summary", ""),
  224. "description": operation.get("description", ""),
  225. "parameters": operation.get("parameters", []),
  226. "responses": operation.get("responses", {}),
  227. }
  228. if "requestBody" in operation:
  229. openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
  230. # convert definitions
  231. for name, definition in swagger["definitions"].items():
  232. openapi["components"]["schemas"][name] = definition
  233. return openapi
  234. @staticmethod
  235. def parse_openai_plugin_json_to_tool_bundle(
  236. json: str, extra_info: dict | None = None, warning: dict | None = None
  237. ) -> list[ApiToolBundle]:
  238. """
  239. parse openapi plugin yaml to tool bundle
  240. :param json: the json string
  241. :return: the tool bundle
  242. """
  243. warning = warning if warning is not None else {}
  244. extra_info = extra_info if extra_info is not None else {}
  245. try:
  246. openai_plugin = json_loads(json)
  247. api = openai_plugin["api"]
  248. api_url = api["url"]
  249. api_type = api["type"]
  250. except JSONDecodeError:
  251. raise ToolProviderNotFoundError("Invalid openai plugin json.")
  252. if api_type != "openapi":
  253. raise ToolNotSupportedError("Only openapi is supported now.")
  254. # get openapi yaml
  255. response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
  256. if response.status_code != 200:
  257. raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
  258. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
  259. response.text, extra_info=extra_info, warning=warning
  260. )
  261. @staticmethod
  262. def auto_parse_to_tool_bundle(
  263. content: str, extra_info: dict | None = None, warning: dict | None = None
  264. ) -> tuple[list[ApiToolBundle], str]:
  265. """
  266. auto parse to tool bundle
  267. :param content: the content
  268. :return: tools bundle, schema_type
  269. """
  270. warning = warning if warning is not None else {}
  271. extra_info = extra_info if extra_info is not None else {}
  272. content = content.strip()
  273. loaded_content = None
  274. json_error = None
  275. yaml_error = None
  276. try:
  277. loaded_content = json_loads(content)
  278. except JSONDecodeError as e:
  279. json_error = e
  280. if loaded_content is None:
  281. try:
  282. loaded_content = safe_load(content)
  283. except YAMLError as e:
  284. yaml_error = e
  285. if loaded_content is None:
  286. raise ToolApiSchemaError(
  287. f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
  288. f" yaml error: {str(yaml_error)}"
  289. )
  290. swagger_error = None
  291. openapi_error = None
  292. openapi_plugin_error = None
  293. schema_type = None
  294. try:
  295. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  296. loaded_content, extra_info=extra_info, warning=warning
  297. )
  298. schema_type = ApiProviderSchemaType.OPENAPI.value
  299. return openapi, schema_type
  300. except ToolApiSchemaError as e:
  301. openapi_error = e
  302. # openai parse error, fallback to swagger
  303. try:
  304. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
  305. loaded_content, extra_info=extra_info, warning=warning
  306. )
  307. schema_type = ApiProviderSchemaType.SWAGGER.value
  308. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  309. converted_swagger, extra_info=extra_info, warning=warning
  310. ), schema_type
  311. except ToolApiSchemaError as e:
  312. swagger_error = e
  313. # swagger parse error, fallback to openai plugin
  314. try:
  315. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
  316. json_dumps(loaded_content), extra_info=extra_info, warning=warning
  317. )
  318. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  319. except ToolNotSupportedError as e:
  320. # maybe it's not plugin at all
  321. openapi_plugin_error = e
  322. raise ToolApiSchemaError(
  323. f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
  324. f" openapi plugin error: {str(openapi_plugin_error)}"
  325. )