Переглянути джерело

Add 2 firecrawl tools : Scrape and Search (#6016)

Co-authored-by: -LAN- <laipz8200@outlook.com>
ahasasjeb 9 місяців тому
батько
коміт
ab847c81fa

+ 26 - 0
api/core/tools/provider/builtin/firecrawl/tools/scrape.py

@@ -0,0 +1,26 @@
+import json
+from typing import Any, Union
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class ScrapeTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
+
+        crawl_result = app.scrape_url(
+            url=tool_parameters['url'],
+            wait=True
+        )
+
+        if isinstance(crawl_result, dict):
+            result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
+        else:
+            result_message = str(crawl_result)
+
+        if not crawl_result:
+            return self.create_text_message("Scrape request failed.")
+
+        return self.create_text_message(result_message)

+ 23 - 0
api/core/tools/provider/builtin/firecrawl/tools/scrape.yaml

@@ -0,0 +1,23 @@
+identity:
+  name: scrape
+  author: ahasasjeb
+  label:
+    en_US: Scrape
+    zh_Hans: 抓取
+description:
+  human:
+    en_US: Extract data from a single URL.
+    zh_Hans: 从单个URL抓取数据。
+  llm: This tool is designed to scrape URL and output the content in Markdown format.
+parameters:
+  - name: url
+    type: string
+    required: true
+    label:
+      en_US: URL to scrape
+      zh_Hans: 要抓取的URL
+    human_description:
+      en_US: The URL of the website to scrape and extract data from.
+      zh_Hans: 要抓取并提取数据的网站URL。
+    llm_description: The URL of the website that needs to be crawled. This is a required parameter.
+    form: llm

+ 26 - 0
api/core/tools/provider/builtin/firecrawl/tools/search.py

@@ -0,0 +1,26 @@
+import json
+from typing import Any, Union
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.provider.builtin.firecrawl.firecrawl_appx import FirecrawlApp
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearchTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        app = FirecrawlApp(api_key=self.runtime.credentials['firecrawl_api_key'], base_url=self.runtime.credentials['base_url'])
+
+        crawl_result = app.search(
+            query=tool_parameters['keyword'],
+            wait=True
+        )
+
+        if isinstance(crawl_result, dict):
+            result_message = json.dumps(crawl_result, ensure_ascii=False, indent=4)
+        else:
+            result_message = str(crawl_result)
+
+        if not crawl_result:
+            return self.create_text_message("Search request failed.")
+
+        return self.create_text_message(result_message)

+ 23 - 0
api/core/tools/provider/builtin/firecrawl/tools/search.yaml

@@ -0,0 +1,23 @@
+identity:
+  name: search
+  author: ahasasjeb
+  label:
+    en_US: Search
+    zh_Hans: 搜索
+description:
+  human:
+    en_US: Search, and output in Markdown format
+    zh_Hans: 搜索,并且以Markdown格式输出
+  llm: This tool can perform online searches and convert the results to Markdown format.
+parameters:
+  - name: keyword
+    type: string
+    required: true
+    label:
+      en_US: keyword
+      zh_Hans: 关键词
+    human_description:
+      en_US: Input keywords to use Firecrawl API for search.
+      zh_Hans: 输入关键词即可使用Firecrawl API进行搜索。
+    llm_description: Efficiently extract keywords from user text.
+    form: llm