Forráskód Böngészése

add CrossRef builtin tool: doi query and title query (#7406)

RookieAgent 8 hónapja
szülő
commit
4ff4859036

+ 49 - 0
api/core/tools/provider/builtin/crossref/_assets/icon.svg

@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 200 130.2" style="enable-background:new 0 0 200 130.2;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#3EB1C8;}
+	.st1{fill:#D8D2C4;}
+	.st2{fill:#4F5858;}
+	.st3{fill:#FFC72C;}
+	.st4{fill:#EF3340;}
+</style>
+<g>
+	<polygon class="st0" points="111.8,95.5 111.8,66.8 135.4,59 177.2,73.3 	"/>
+	<polygon class="st1" points="153.6,36.8 111.8,51.2 135.4,59 177.2,44.6 	"/>
+	<polygon class="st2" points="135.4,59 177.2,44.6 177.2,73.3 	"/>
+	<polygon class="st3" points="177.2,0.3 177.2,29 153.6,36.8 111.8,22.5 	"/>
+	<polygon class="st4" points="153.6,36.8 111.8,51.2 111.8,22.5 	"/>
+	<g>
+		<g>
+			<g>
+				<g>
+					<path class="st2" d="M26.3,104.8c-0.5-3.7-4.1-6.5-8.1-6.5c-7.3,0-10.1,6.2-10.1,12.7c0,6.2,2.8,12.4,10.1,12.4
+						c5,0,7.8-3.4,8.4-8.3h7.9c-0.8,9.2-7.2,15.2-16.3,15.2C6.8,130.2,0,121.7,0,111c0-11,6.8-19.6,18.2-19.6c8.2,0,15,4.8,16,13.3
+						H26.3z"/>
+					<path class="st2" d="M37.4,102.5h7v5h0.1c1.4-3.4,5-5.7,8.6-5.7c0.5,0,1.1,0.1,1.6,0.3v6.9c-0.7-0.2-1.8-0.3-2.6-0.3
+						c-5.4,0-7.3,3.9-7.3,8.6v12.1h-7.4V102.5z"/>
+					<path class="st2" d="M68.7,101.8c8.5,0,13.9,5.6,13.9,14.2c0,8.5-5.5,14.1-13.9,14.1c-8.4,0-13.9-5.6-13.9-14.1
+						C54.9,107.4,60.3,101.8,68.7,101.8z M68.7,124.5c5,0,6.5-4.3,6.5-8.6c0-4.3-1.5-8.6-6.5-8.6c-5,0-6.5,4.3-6.5,8.6
+						C62.2,120.2,63.8,124.5,68.7,124.5z"/>
+					<path class="st2" d="M91.2,120.6c0.1,3.2,2.8,4.5,5.7,4.5c2.1,0,4.8-0.8,4.8-3.4c0-2.2-3.1-3-8.4-4.2c-4.3-0.9-8.5-2.4-8.5-7.2
+						c0-6.9,5.9-8.6,11.7-8.6c5.9,0,11.3,2,11.8,8.6h-7c-0.2-2.9-2.4-3.6-5-3.6c-1.7,0-4.1,0.3-4.1,2.5c0,2.6,4.2,3,8.4,4
+						c4.3,1,8.5,2.5,8.5,7.5c0,7.1-6.1,9.3-12.3,9.3c-6.2,0-12.3-2.3-12.6-9.5H91.2z"/>
+					<path class="st2" d="M118.1,120.6c0.1,3.2,2.8,4.5,5.7,4.5c2.1,0,4.8-0.8,4.8-3.4c0-2.2-3.1-3-8.4-4.2
+						c-4.3-0.9-8.5-2.4-8.5-7.2c0-6.9,5.9-8.6,11.7-8.6c5.9,0,11.3,2,11.8,8.6h-7c-0.2-2.9-2.4-3.6-5-3.6c-1.7,0-4.1,0.3-4.1,2.5
+						c0,2.6,4.2,3,8.4,4c4.3,1,8.5,2.5,8.5,7.5c0,7.1-6.1,9.3-12.3,9.3c-6.2,0-12.3-2.3-12.6-9.5H118.1z"/>
+					<path class="st2" d="M138.4,102.5h7v5h0.1c1.4-3.4,5-5.7,8.6-5.7c0.5,0,1.1,0.1,1.6,0.3v6.9c-0.7-0.2-1.8-0.3-2.6-0.3
+						c-5.4,0-7.3,3.9-7.3,8.6v12.1h-7.4V102.5z"/>
+					<path class="st2" d="M163.7,117.7c0.2,4.7,2.5,6.8,6.6,6.8c3,0,5.3-1.8,5.8-3.5h6.5c-2.1,6.3-6.5,9-12.6,9
+						c-8.5,0-13.7-5.8-13.7-14.1c0-8,5.6-14.2,13.7-14.2c9.1,0,13.6,7.7,13,15.9H163.7z M175.7,113.1c-0.7-3.7-2.3-5.7-5.9-5.7
+						c-4.7,0-6,3.6-6.1,5.7H175.7z"/>
+					<path class="st2" d="M187.2,107.5h-4.4v-4.9h4.4v-2.1c0-4.7,3-8.2,9-8.2c1.3,0,2.6,0.2,3.9,0.2V98c-0.9-0.1-1.8-0.2-2.7-0.2
+						c-2,0-2.8,0.8-2.8,3.1v1.6h5.1v4.9h-5.1v21.9h-7.4V107.5z"/>
+				</g>
+			</g>
+		</g>
+	</g>
+</g>
+</svg>

+ 20 - 0
api/core/tools/provider/builtin/crossref/crossref.py

@@ -0,0 +1,20 @@
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.crossref.tools.query_doi import CrossRefQueryDOITool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class CrossRefProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            CrossRefQueryDOITool().fork_tool_runtime(
+                runtime={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "doi": '10.1007/s00894-022-05373-8',
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))

+ 29 - 0
api/core/tools/provider/builtin/crossref/crossref.yaml

@@ -0,0 +1,29 @@
+identity:
+  author: Sakura4036
+  name: crossref
+  label:
+    en_US: CrossRef
+    zh_Hans: CrossRef
+  description:
+    en_US: Crossref is a cross-publisher reference linking registration query system using DOI technology created in 2000. Crossref establishes cross-database links between the reference list and citation full text of papers, making it very convenient for readers to access the full text of papers.
+    zh_Hans: Crossref是于2000年创建的使用DOI技术的跨出版商参考文献链接注册查询系统。Crossref建立了在论文的参考文献列表和引文全文之间的跨数据库链接,使得读者能够非常便捷地获取文献全文。
+  icon: icon.svg
+  tags:
+    - search
+credentials_for_provider:
+  mailto:
+    type: text-input
+    required: true
+    label:
+      en_US: email address
+      zh_Hans: email地址
+      pt_BR: email address
+    placeholder:
+      en_US: Please input your email address
+      zh_Hans: 请输入你的email地址
+      pt_BR: Please input your email address
+    help:
+      en_US: According to the requirements of Crossref, an email address is required
+      zh_Hans: 根据Crossref的要求,需要提供一个邮箱地址
+      pt_BR: According to the requirements of Crossref, an email address is required
+    url: https://api.crossref.org/swagger-ui/index.html

+ 25 - 0
api/core/tools/provider/builtin/crossref/tools/query_doi.py

@@ -0,0 +1,25 @@
+from typing import Any, Union
+
+import requests
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.errors import ToolParameterValidationError
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class CrossRefQueryDOITool(BuiltinTool):
+    """
+    Tool for querying the metadata of a publication using its DOI.
+    """
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        doi = tool_parameters.get('doi')
+        if not doi:
+            raise ToolParameterValidationError('doi is required.')
+        # doc: https://github.com/CrossRef/rest-api-doc
+        url = f"https://api.crossref.org/works/{doi}"
+        response = requests.get(url)
+        response.raise_for_status()
+        response = response.json()
+        message = response.get('message', {})
+
+        return self.create_json_message(message)

+ 23 - 0
api/core/tools/provider/builtin/crossref/tools/query_doi.yaml

@@ -0,0 +1,23 @@
+identity:
+  name: crossref_query_doi
+  author: Sakura4036
+  label:
+    en_US: CrossRef Query DOI
+    zh_Hans: CrossRef DOI 查询
+    pt_BR: CrossRef Query DOI
+description:
+  human:
+    en_US: A tool for searching literature information using CrossRef by DOI.
+    zh_Hans: 一个使用CrossRef通过DOI获取文献信息的工具。
+    pt_BR: A tool for searching literature information using CrossRef by DOI.
+  llm: A tool for searching literature information using CrossRef by DOI.
+parameters:
+  - name: doi
+    type: string
+    required: true
+    label:
+      en_US: DOI
+      zh_Hans: DOI
+      pt_BR: DOI
+    llm_description: DOI for searching in CrossRef
+    form: llm

+ 120 - 0
api/core/tools/provider/builtin/crossref/tools/query_title.py

@@ -0,0 +1,120 @@
+import time
+from typing import Any, Union
+
+import requests
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+def convert_time_str_to_seconds(time_str: str) -> int:
+    """
+    Convert a time string to seconds.
+    example: 1s -> 1,  1m30s -> 90, 1h30m -> 5400, 1h30m30s -> 5430
+    """
+    time_str = time_str.lower().strip().replace(' ', '')
+    seconds = 0
+    if 'h' in time_str:
+        hours, time_str = time_str.split('h')
+        seconds += int(hours) * 3600
+    if 'm' in time_str:
+        minutes, time_str = time_str.split('m')
+        seconds += int(minutes) * 60
+    if 's' in time_str:
+        seconds += int(time_str.replace('s', ''))
+    return seconds
+
+
+class CrossRefQueryTitleAPI:
+    """
+    Tool for querying the metadata of a publication using its title.
+    Crossref API doc: https://github.com/CrossRef/rest-api-doc
+    """
+    query_url_template: str = "https://api.crossref.org/works?query.bibliographic={query}&rows={rows}&offset={offset}&sort={sort}&order={order}&mailto={mailto}"
+    rate_limit: int = 50
+    rate_interval: float = 1
+    max_limit: int = 1000
+
+    def __init__(self, mailto: str):
+        self.mailto = mailto
+
+    def _query(self, query: str, rows: int = 5, offset: int = 0, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]:
+        """
+        Query the metadata of a publication using its title.
+        :param query: the title of the publication
+        :param rows: the number of results to return
+        :param sort: the sort field
+        :param order: the sort order
+        :param fuzzy_query: whether to return all items that match the query
+        """
+        url = self.query_url_template.format(query=query, rows=rows, offset=offset, sort=sort, order=order, mailto=self.mailto)
+        response = requests.get(url)
+        response.raise_for_status()
+        rate_limit = int(response.headers['x-ratelimit-limit'])
+        # convert time string to seconds
+        rate_interval = convert_time_str_to_seconds(response.headers['x-ratelimit-interval'])
+
+        self.rate_limit = rate_limit
+        self.rate_interval = rate_interval
+
+        response = response.json()
+        if response['status'] != 'ok':
+            return []
+
+        message = response['message']
+        if fuzzy_query:
+            # fuzzy query return all items
+            return message['items']
+        else:
+            for paper in message['items']:
+                title = paper['title'][0]
+                if title.lower() != query.lower():
+                    continue
+                return [paper]
+        return []
+
+    def query(self, query: str, rows: int = 5, sort: str = 'relevance', order: str = 'desc', fuzzy_query: bool = False) -> list[dict]:
+        """
+        Query the metadata of a publication using its title.
+        :param query: the title of the publication
+        :param rows: the number of results to return
+        :param sort: the sort field
+        :param order: the sort order
+        :param fuzzy_query: whether to return all items that match the query
+        """
+        rows = min(rows, self.max_limit)
+        if rows > self.rate_limit:
+            # query multiple times
+            query_times = rows // self.rate_limit + 1
+            results = []
+
+            for i in range(query_times):
+                result = self._query(query, rows=self.rate_limit, offset=i * self.rate_limit, sort=sort, order=order, fuzzy_query=fuzzy_query)
+                if fuzzy_query:
+                    results.extend(result)
+                else:
+                    # fuzzy_query=False, only one result
+                    if result:
+                        return result
+                time.sleep(self.rate_interval)
+            return results
+        else:
+            # query once
+            return self._query(query, rows, sort=sort, order=order, fuzzy_query=fuzzy_query)
+
+
+class CrossRefQueryTitleTool(BuiltinTool):
+    """
+    Tool for querying the metadata of a publication using its title.
+    """
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        query = tool_parameters.get('query')
+        fuzzy_query = tool_parameters.get('fuzzy_query', False)
+        rows = tool_parameters.get('rows', 3)
+        sort = tool_parameters.get('sort', 'relevance')
+        order = tool_parameters.get('order', 'desc')
+        mailto = self.runtime.credentials['mailto']
+
+        result = CrossRefQueryTitleAPI(mailto).query(query, rows, sort, order, fuzzy_query)
+
+        return [self.create_json_message(r) for r in result]

+ 105 - 0
api/core/tools/provider/builtin/crossref/tools/query_title.yaml

@@ -0,0 +1,105 @@
+identity:
+  name: crossref_query_title
+  author: Sakura4036
+  label:
+    en_US: CrossRef Title Query
+    zh_Hans: CrossRef 标题查询
+    pt_BR: CrossRef Title Query
+description:
+  human:
+    en_US: A tool for querying literature information using CrossRef by title.
+    zh_Hans: 一个使用CrossRef通过标题搜索文献信息的工具。
+    pt_BR: A tool for querying literature information using CrossRef by title.
+  llm: A tool for querying literature information using CrossRef by title.
+parameters:
+  - name: query
+    type: string
+    required: true
+    label:
+      en_US: 标题
+      zh_Hans: 查询语句
+      pt_BR: 标题
+    human_description:
+      en_US: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years
+      zh_Hans: 用于搜索文献信息,有助于查找引用。包括标题,作者,ISSN和出版年份
+      pt_BR: Query bibliographic information, useful for citation look up. Includes titles, authors, ISSNs and publication years
+    llm_description: key words for querying in Web of Science
+    form: llm
+  - name: fuzzy_query
+    type: boolean
+    default: false
+    label:
+      en_US: Whether to fuzzy search
+      zh_Hans: 是否模糊搜索
+      pt_BR: Whether to fuzzy search
+    human_description:
+      en_US: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none
+      zh_Hans: 用于选择搜索类型,模糊搜索返回更多结果,精确搜索返回1条结果或无
+      pt_BR: used for selecting the query type, fuzzy query returns more results, precise query returns 1 or none
+    form: form
+  - name: limit
+    type: number
+    required: false
+    label:
+      en_US: max query number
+      zh_Hans: 最大搜索数
+      pt_BR: max query number
+    human_description:
+      en_US: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches)
+      zh_Hans: 最大搜索数(模糊搜索返回的最大结果数或精确搜索最大匹配数)
+      pt_BR: max query number(fuzzy search returns the maximum number of results or precise search the maximum number of matches)
+    form: llm
+    default: 50
+  - name: sort
+    type: select
+    required: true
+    options:
+      - value: relevance
+        label:
+          en_US: relevance
+          zh_Hans: 相关性
+          pt_BR: relevance
+      - value: published
+        label:
+          en_US: publication date
+          zh_Hans: 出版日期
+          pt_BR: publication date
+      - value: references-count
+        label:
+          en_US: references-count
+          zh_Hans: 引用次数
+          pt_BR: references-count
+    default: relevance
+    label:
+      en_US: sorting field
+      zh_Hans: 排序字段
+      pt_BR: sorting field
+    human_description:
+      en_US: Sorting of query results
+      zh_Hans: 检索结果的排序字段
+      pt_BR: Sorting of query results
+    form: form
+  - name: order
+    type: select
+    required: true
+    options:
+      - value: desc
+        label:
+          en_US: descending
+          zh_Hans: 降序
+          pt_BR: descending
+      - value: asc
+        label:
+          en_US: ascending
+          zh_Hans: 升序
+          pt_BR: ascending
+    default: desc
+    label:
+      en_US: Order
+      zh_Hans: 排序
+      pt_BR: Order
+    human_description:
+      en_US: Order of query results
+      zh_Hans: 检索结果的排序方式
+      pt_BR: Order of query results
+    form: form