parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. import re
  2. import uuid
  3. from json import dumps as json_dumps
  4. from json import loads as json_loads
  5. from json.decoder import JSONDecodeError
  6. from typing import Optional
  7. from requests import get
  8. from yaml import YAMLError, safe_load # type: ignore
  9. from core.tools.entities.common_entities import I18nObject
  10. from core.tools.entities.tool_bundle import ApiToolBundle
  11. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  12. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  13. class ApiBasedToolSchemaParser:
  14. @staticmethod
  15. def parse_openapi_to_tool_bundle(
  16. openapi: dict, extra_info: Optional[dict], warning: Optional[dict]
  17. ) -> list[ApiToolBundle]:
  18. warning = warning if warning is not None else {}
  19. extra_info = extra_info if extra_info is not None else {}
  20. # set description to extra_info
  21. extra_info["description"] = openapi["info"].get("description", "")
  22. if len(openapi["servers"]) == 0:
  23. raise ToolProviderNotFoundError("No server found in the openapi yaml.")
  24. server_url = openapi["servers"][0]["url"]
  25. # list all interfaces
  26. interfaces = []
  27. for path, path_item in openapi["paths"].items():
  28. methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
  29. for method in methods:
  30. if method in path_item:
  31. interfaces.append(
  32. {
  33. "path": path,
  34. "method": method,
  35. "operation": path_item[method],
  36. }
  37. )
  38. # get all parameters
  39. bundles = []
  40. for interface in interfaces:
  41. # convert parameters
  42. parameters = []
  43. if "parameters" in interface["operation"]:
  44. for parameter in interface["operation"]["parameters"]:
  45. tool_parameter = ToolParameter(
  46. name=parameter["name"],
  47. label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
  48. human_description=I18nObject(
  49. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  50. ),
  51. type=ToolParameter.ToolParameterType.STRING,
  52. required=parameter.get("required", False),
  53. form=ToolParameter.ToolParameterForm.LLM,
  54. llm_description=parameter.get("description"),
  55. default=parameter["schema"]["default"]
  56. if "schema" in parameter and "default" in parameter["schema"]
  57. else None,
  58. placeholder=I18nObject(
  59. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  60. ),
  61. )
  62. # check if there is a type
  63. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  64. if typ:
  65. tool_parameter.type = typ
  66. parameters.append(tool_parameter)
  67. # create tool bundle
  68. # check if there is a request body
  69. if "requestBody" in interface["operation"]:
  70. request_body = interface["operation"]["requestBody"]
  71. if "content" in request_body:
  72. for content_type, content in request_body["content"].items():
  73. # if there is a reference, get the reference and overwrite the content
  74. if "schema" not in content:
  75. continue
  76. if "$ref" in content["schema"]:
  77. # get the reference
  78. root = openapi
  79. reference = content["schema"]["$ref"].split("/")[1:]
  80. for ref in reference:
  81. root = root[ref]
  82. # overwrite the content
  83. interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
  84. # parse body parameters
  85. if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
  86. body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
  87. required = body_schema.get("required", [])
  88. properties = body_schema.get("properties", {})
  89. for name, property in properties.items():
  90. tool = ToolParameter(
  91. name=name,
  92. label=I18nObject(en_US=name, zh_Hans=name),
  93. human_description=I18nObject(
  94. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  95. ),
  96. type=ToolParameter.ToolParameterType.STRING,
  97. required=name in required,
  98. form=ToolParameter.ToolParameterForm.LLM,
  99. llm_description=property.get("description", ""),
  100. default=property.get("default", None),
  101. placeholder=I18nObject(
  102. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  103. ),
  104. )
  105. # check if there is a type
  106. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  107. if typ:
  108. tool.type = typ
  109. parameters.append(tool)
  110. # check if parameters is duplicated
  111. parameters_count = {}
  112. for parameter in parameters:
  113. if parameter.name not in parameters_count:
  114. parameters_count[parameter.name] = 0
  115. parameters_count[parameter.name] += 1
  116. for name, count in parameters_count.items():
  117. if count > 1:
  118. warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
  119. # check if there is a operation id, use $path_$method as operation id if not
  120. if "operationId" not in interface["operation"]:
  121. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  122. path = interface["path"]
  123. if interface["path"].startswith("/"):
  124. path = interface["path"][1:]
  125. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  126. path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
  127. if not path:
  128. path = str(uuid.uuid4())
  129. interface["operation"]["operationId"] = f'{path}_{interface["method"]}'
  130. bundles.append(
  131. ApiToolBundle(
  132. server_url=server_url + interface["path"],
  133. method=interface["method"],
  134. summary=interface["operation"]["description"]
  135. if "description" in interface["operation"]
  136. else interface["operation"].get("summary", None),
  137. operation_id=interface["operation"]["operationId"],
  138. parameters=parameters,
  139. author="",
  140. icon=None,
  141. openapi=interface["operation"],
  142. )
  143. )
  144. return bundles
  145. @staticmethod
  146. def _get_tool_parameter_type(parameter: dict) -> Optional[ToolParameter.ToolParameterType]:
  147. parameter = parameter or {}
  148. typ: Optional[str] = None
  149. if parameter.get("format") == "binary":
  150. return ToolParameter.ToolParameterType.FILE
  151. if "type" in parameter:
  152. typ = parameter["type"]
  153. elif "schema" in parameter and "type" in parameter["schema"]:
  154. typ = parameter["schema"]["type"]
  155. if typ in {"integer", "number"}:
  156. return ToolParameter.ToolParameterType.NUMBER
  157. elif typ == "boolean":
  158. return ToolParameter.ToolParameterType.BOOLEAN
  159. elif typ == "string":
  160. return ToolParameter.ToolParameterType.STRING
  161. else:
  162. return None
  163. @staticmethod
  164. def parse_openapi_yaml_to_tool_bundle(
  165. yaml: str, extra_info: Optional[dict], warning: Optional[dict]
  166. ) -> list[ApiToolBundle]:
  167. """
  168. parse openapi yaml to tool bundle
  169. :param yaml: the yaml string
  170. :return: the tool bundle
  171. """
  172. warning = warning if warning is not None else {}
  173. extra_info = extra_info if extra_info is not None else {}
  174. openapi: dict = safe_load(yaml)
  175. if openapi is None:
  176. raise ToolApiSchemaError("Invalid openapi yaml.")
  177. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  178. @staticmethod
  179. def parse_swagger_to_openapi(swagger: dict, extra_info: Optional[dict], warning: Optional[dict]) -> dict:
  180. """
  181. parse swagger to openapi
  182. :param swagger: the swagger dict
  183. :return: the openapi dict
  184. """
  185. # convert swagger to openapi
  186. info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
  187. servers = swagger.get("servers", [])
  188. if len(servers) == 0:
  189. raise ToolApiSchemaError("No server found in the swagger yaml.")
  190. openapi = {
  191. "openapi": "3.0.0",
  192. "info": {
  193. "title": info.get("title", "Swagger"),
  194. "description": info.get("description", "Swagger"),
  195. "version": info.get("version", "1.0.0"),
  196. },
  197. "servers": swagger["servers"],
  198. "paths": {},
  199. "components": {"schemas": {}},
  200. }
  201. # check paths
  202. if "paths" not in swagger or len(swagger["paths"]) == 0:
  203. raise ToolApiSchemaError("No paths found in the swagger yaml.")
  204. # convert paths
  205. for path, path_item in swagger["paths"].items():
  206. openapi["paths"][path] = {}
  207. for method, operation in path_item.items():
  208. if "operationId" not in operation:
  209. raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
  210. if ("summary" not in operation or len(operation["summary"]) == 0) and (
  211. "description" not in operation or len(operation["description"]) == 0
  212. ):
  213. if warning is not None:
  214. warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
  215. openapi["paths"][path][method] = {
  216. "operationId": operation["operationId"],
  217. "summary": operation.get("summary", ""),
  218. "description": operation.get("description", ""),
  219. "parameters": operation.get("parameters", []),
  220. "responses": operation.get("responses", {}),
  221. }
  222. if "requestBody" in operation:
  223. openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
  224. # convert definitions
  225. for name, definition in swagger["definitions"].items():
  226. openapi["components"]["schemas"][name] = definition
  227. return openapi
  228. @staticmethod
  229. def parse_openai_plugin_json_to_tool_bundle(
  230. json: str, extra_info: Optional[dict], warning: Optional[dict]
  231. ) -> list[ApiToolBundle]:
  232. """
  233. parse openapi plugin yaml to tool bundle
  234. :param json: the json string
  235. :return: the tool bundle
  236. """
  237. warning = warning if warning is not None else {}
  238. extra_info = extra_info if extra_info is not None else {}
  239. try:
  240. openai_plugin = json_loads(json)
  241. api = openai_plugin["api"]
  242. api_url = api["url"]
  243. api_type = api["type"]
  244. except:
  245. raise ToolProviderNotFoundError("Invalid openai plugin json.")
  246. if api_type != "openapi":
  247. raise ToolNotSupportedError("Only openapi is supported now.")
  248. # get openapi yaml
  249. response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
  250. if response.status_code != 200:
  251. raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
  252. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
  253. response.text, extra_info=extra_info, warning=warning
  254. )
  255. @staticmethod
  256. def auto_parse_to_tool_bundle(
  257. content: str, extra_info: Optional[dict] = None, warning: Optional[dict] = None
  258. ) -> tuple[list[ApiToolBundle], str]:
  259. """
  260. auto parse to tool bundle
  261. :param content: the content
  262. :return: tools bundle, schema_type
  263. """
  264. warning = warning if warning is not None else {}
  265. extra_info = extra_info if extra_info is not None else {}
  266. content = content.strip()
  267. loaded_content = None
  268. json_error = None
  269. yaml_error = None
  270. try:
  271. loaded_content = json_loads(content)
  272. except JSONDecodeError as e:
  273. json_error = e
  274. if loaded_content is None:
  275. try:
  276. loaded_content = safe_load(content)
  277. except YAMLError as e:
  278. yaml_error = e
  279. if loaded_content is None:
  280. raise ToolApiSchemaError(
  281. f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
  282. f" yaml error: {str(yaml_error)}"
  283. )
  284. swagger_error = None
  285. openapi_error = None
  286. openapi_plugin_error = None
  287. schema_type = None
  288. try:
  289. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  290. loaded_content, extra_info=extra_info, warning=warning
  291. )
  292. schema_type = ApiProviderSchemaType.OPENAPI.value
  293. return openapi, schema_type
  294. except ToolApiSchemaError as e:
  295. openapi_error = e
  296. # openai parse error, fallback to swagger
  297. try:
  298. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
  299. loaded_content, extra_info=extra_info, warning=warning
  300. )
  301. schema_type = ApiProviderSchemaType.SWAGGER.value
  302. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  303. converted_swagger, extra_info=extra_info, warning=warning
  304. ), schema_type
  305. except ToolApiSchemaError as e:
  306. swagger_error = e
  307. # swagger parse error, fallback to openai plugin
  308. try:
  309. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
  310. json_dumps(loaded_content), extra_info=extra_info, warning=warning
  311. )
  312. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  313. except ToolNotSupportedError as e:
  314. # maybe it's not plugin at all
  315. openapi_plugin_error = e
  316. raise ToolApiSchemaError(
  317. f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
  318. f" openapi plugin error: {str(openapi_plugin_error)}"
  319. )