Browse Source

fix(api/core/model_runtime/model_providers/baichuan,localai): Parse ToolPromptMessage. #4943 (#5138)

Co-authored-by: -LAN- <laipz8200@outlook.com>
yanghx 10 months ago
parent
commit
adc948e87c

+ 32 - 14
api/core/model_runtime/model_providers/baichuan/llm/llm.py

@@ -7,6 +7,7 @@ from core.model_runtime.entities.message_entities import (
     PromptMessage,
     PromptMessageTool,
     SystemPromptMessage,
+    ToolPromptMessage,
     UserPromptMessage,
 )
 from core.model_runtime.errors.invoke import (
@@ -32,20 +33,21 @@ from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors impor
 
 
 class BaichuanLarguageModel(LargeLanguageModel):
-    def _invoke(self, model: str, credentials: dict, 
-                prompt_messages: list[PromptMessage], model_parameters: dict, 
-                tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, 
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
                 stream: bool = True, user: str | None = None) \
             -> LLMResult | Generator:
         return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages,
-                                model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user)
+                              model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user)
 
     def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                        tools: list[PromptMessageTool] | None = None) -> int:
         return self._num_tokens_from_messages(prompt_messages)
 
-    def _num_tokens_from_messages(self, messages: list[PromptMessage],) -> int:
+    def _num_tokens_from_messages(self, messages: list[PromptMessage], ) -> int:
         """Calculate num tokens for baichuan model"""
+
         def tokens(text: str):
             return BaichuanTokenizer._get_num_tokens(text)
 
@@ -85,9 +87,20 @@ class BaichuanLarguageModel(LargeLanguageModel):
         elif isinstance(message, SystemPromptMessage):
             message = cast(SystemPromptMessage, message)
             message_dict = {"role": "user", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            # copy from core/model_runtime/model_providers/anthropic/llm/llm.py
+            message = cast(ToolPromptMessage, message)
+            message_dict = {
+                "role": "user",
+                "content": [{
+                    "type": "tool_result",
+                    "tool_use_id": message.tool_call_id,
+                    "content": message.content
+                }]
+            }
         else:
             raise ValueError(f"Unknown message type {type(message)}")
-        
+
         return message_dict
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
@@ -106,13 +119,13 @@ class BaichuanLarguageModel(LargeLanguageModel):
         except Exception as e:
             raise CredentialsValidateFailedError(f"Invalid API key: {e}")
 
-    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], 
-                 model_parameters: dict, tools: list[PromptMessageTool] | None = None, 
-                 stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
+    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
+                  model_parameters: dict, tools: list[PromptMessageTool] | None = None,
+                  stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
             -> LLMResult | Generator:
         if tools is not None and len(tools) > 0:
             raise InvokeBadRequestError("Baichuan model doesn't support tools")
-        
+
         instance = BaichuanModel(
             api_key=credentials['api_key'],
             secret_key=credentials.get('secret_key', '')
@@ -129,11 +142,12 @@ class BaichuanLarguageModel(LargeLanguageModel):
         ]
 
         # invoke model
-        response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters, timeout=60)
+        response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters,
+                                     timeout=60)
 
         if stream:
             return self._handle_chat_generate_stream_response(model, prompt_messages, credentials, response)
-        
+
         return self._handle_chat_generate_response(model, prompt_messages, credentials, response)
 
     def _handle_chat_generate_response(self, model: str,
@@ -141,7 +155,9 @@ class BaichuanLarguageModel(LargeLanguageModel):
                                        credentials: dict,
                                        response: BaichuanMessage) -> LLMResult:
         # convert baichuan message to llm result
-        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=response.usage['prompt_tokens'], completion_tokens=response.usage['completion_tokens'])
+        usage = self._calc_response_usage(model=model, credentials=credentials,
+                                          prompt_tokens=response.usage['prompt_tokens'],
+                                          completion_tokens=response.usage['completion_tokens'])
         return LLMResult(
             model=model,
             prompt_messages=prompt_messages,
@@ -158,7 +174,9 @@ class BaichuanLarguageModel(LargeLanguageModel):
                                               response: Generator[BaichuanMessage, None, None]) -> Generator:
         for message in response:
             if message.usage:
-                usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=message.usage['prompt_tokens'], completion_tokens=message.usage['completion_tokens'])
+                usage = self._calc_response_usage(model=model, credentials=credentials,
+                                                  prompt_tokens=message.usage['prompt_tokens'],
+                                                  completion_tokens=message.usage['completion_tokens'])
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,

+ 56 - 40
api/core/model_runtime/model_providers/localai/llm/llm.py

@@ -27,6 +27,7 @@ from core.model_runtime.entities.message_entities import (
     PromptMessage,
     PromptMessageTool,
     SystemPromptMessage,
+    ToolPromptMessage,
     UserPromptMessage,
 )
 from core.model_runtime.entities.model_entities import (
@@ -51,13 +52,13 @@ from core.model_runtime.utils import helper
 
 
 class LocalAILanguageModel(LargeLanguageModel):
-    def _invoke(self, model: str, credentials: dict, 
-                prompt_messages: list[PromptMessage], model_parameters: dict, 
-                tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, 
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
                 stream: bool = True, user: str | None = None) \
             -> LLMResult | Generator:
         return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages,
-                                model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user)
+                              model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user)
 
     def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                        tools: list[PromptMessageTool] | None = None) -> int:
@@ -67,8 +68,9 @@ class LocalAILanguageModel(LargeLanguageModel):
     def _num_tokens_from_messages(self, messages: list[PromptMessage], tools: list[PromptMessageTool]) -> int:
         """
             Calculate num tokens for baichuan model
-            LocalAI does not supports 
+            LocalAI does not supports
         """
+
         def tokens(text: str):
             """
                 We cloud not determine which tokenizer to use, cause the model is customized.
@@ -124,7 +126,7 @@ class LocalAILanguageModel(LargeLanguageModel):
             num_tokens += self._num_tokens_for_tools(tools)
 
         return num_tokens
-    
+
     def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int:
         """
         Calculate num tokens for tool calling
@@ -133,6 +135,7 @@ class LocalAILanguageModel(LargeLanguageModel):
         :param tools: tools for tool calling
         :return: number of tokens
         """
+
         def tokens(text: str):
             return self._get_num_tokens_by_gpt2(text)
 
@@ -193,7 +196,7 @@ class LocalAILanguageModel(LargeLanguageModel):
             completion_model = LLMMode.COMPLETION.value
         else:
             raise ValueError(f"Unknown completion type {credentials['completion_type']}")
-            
+
         rules = [
             ParameterRule(
                 name='temperature',
@@ -227,7 +230,7 @@ class LocalAILanguageModel(LargeLanguageModel):
             )
         ]
 
-        model_properties = { 
+        model_properties = {
             ModelPropertyKey.MODE: completion_model,
         } if completion_model else {}
 
@@ -246,11 +249,11 @@ class LocalAILanguageModel(LargeLanguageModel):
 
         return entity
 
-    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], 
-                 model_parameters: dict, tools: list[PromptMessageTool] | None = None, 
-                 stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
+    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
+                  model_parameters: dict, tools: list[PromptMessageTool] | None = None,
+                  stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
             -> LLMResult | Generator:
-        
+
         kwargs = self._to_client_kwargs(credentials)
         # init model client
         client = OpenAI(**kwargs)
@@ -271,7 +274,7 @@ class LocalAILanguageModel(LargeLanguageModel):
             extra_model_kwargs['functions'] = [
                 helper.dump_model(tool) for tool in tools
             ]
-        
+
         if completion_type == 'chat_completion':
             result = client.chat.completions.create(
                 messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
@@ -294,24 +297,24 @@ class LocalAILanguageModel(LargeLanguageModel):
         if stream:
             if completion_type == 'completion':
                 return self._handle_completion_generate_stream_response(
-                    model=model, credentials=credentials, response=result, tools=tools, 
+                    model=model, credentials=credentials, response=result, tools=tools,
                     prompt_messages=prompt_messages
                 )
             return self._handle_chat_generate_stream_response(
-                model=model, credentials=credentials, response=result, tools=tools, 
+                model=model, credentials=credentials, response=result, tools=tools,
                 prompt_messages=prompt_messages
             )
-        
+
         if completion_type == 'completion':
             return self._handle_completion_generate_response(
-                model=model, credentials=credentials, response=result, 
+                model=model, credentials=credentials, response=result,
                 prompt_messages=prompt_messages
             )
         return self._handle_chat_generate_response(
-            model=model, credentials=credentials, response=result, tools=tools, 
+            model=model, credentials=credentials, response=result, tools=tools,
             prompt_messages=prompt_messages
         )
-    
+
     def _to_client_kwargs(self, credentials: dict) -> dict:
         """
         Convert invoke kwargs to client kwargs
@@ -321,7 +324,7 @@ class LocalAILanguageModel(LargeLanguageModel):
         """
         if not credentials['server_url'].endswith('/'):
             credentials['server_url'] += '/'
-            
+
         client_kwargs = {
             "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
             "api_key": "1",
@@ -351,9 +354,20 @@ class LocalAILanguageModel(LargeLanguageModel):
         elif isinstance(message, SystemPromptMessage):
             message = cast(SystemPromptMessage, message)
             message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            # copy from core/model_runtime/model_providers/anthropic/llm/llm.py
+            message = cast(ToolPromptMessage, message)
+            message_dict = {
+                "role": "user",
+                "content": [{
+                    "type": "tool_result",
+                    "tool_use_id": message.tool_call_id,
+                    "content": message.content
+                }]
+            }
         else:
             raise ValueError(f"Unknown message type {type(message)}")
-        
+
         return message_dict
 
     def _convert_prompt_message_to_completion_prompts(self, messages: list[PromptMessage]) -> str:
@@ -373,14 +387,14 @@ class LocalAILanguageModel(LargeLanguageModel):
                 prompts += f'{message.content}\n'
             else:
                 raise ValueError(f"Unknown message type {type(message)}")
-        
+
         return prompts
 
     def _handle_completion_generate_response(self, model: str,
-                                       prompt_messages: list[PromptMessage],
-                                       credentials: dict,
-                                       response: Completion,
-                                       ) -> LLMResult:
+                                             prompt_messages: list[PromptMessage],
+                                             credentials: dict,
+                                             response: Completion,
+                                             ) -> LLMResult:
         """
         Handle llm chat response
 
@@ -393,7 +407,7 @@ class LocalAILanguageModel(LargeLanguageModel):
         """
         if len(response.choices) == 0:
             raise InvokeServerUnavailableError("Empty response")
-        
+
         assistant_message = response.choices[0].text
 
         # transform assistant message to prompt message
@@ -407,7 +421,8 @@ class LocalAILanguageModel(LargeLanguageModel):
         )
         completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=[])
 
-        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
+        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens,
+                                          completion_tokens=completion_tokens)
 
         response = LLMResult(
             model=model,
@@ -436,7 +451,7 @@ class LocalAILanguageModel(LargeLanguageModel):
         """
         if len(response.choices) == 0:
             raise InvokeServerUnavailableError("Empty response")
-        
+
         assistant_message = response.choices[0].message
 
         # convert function call to tool call
@@ -452,7 +467,8 @@ class LocalAILanguageModel(LargeLanguageModel):
         prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
         completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=tools)
 
-        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
+        usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens,
+                                          completion_tokens=completion_tokens)
 
         response = LLMResult(
             model=model,
@@ -465,10 +481,10 @@ class LocalAILanguageModel(LargeLanguageModel):
         return response
 
     def _handle_completion_generate_stream_response(self, model: str,
-                                              prompt_messages: list[PromptMessage],
-                                              credentials: dict,
-                                              response: Stream[Completion],
-                                              tools: list[PromptMessageTool]) -> Generator:
+                                                    prompt_messages: list[PromptMessage],
+                                                    credentials: dict,
+                                                    response: Stream[Completion],
+                                                    tools: list[PromptMessageTool]) -> Generator:
         full_response = ''
 
         for chunk in response:
@@ -496,9 +512,9 @@ class LocalAILanguageModel(LargeLanguageModel):
 
                 completion_tokens = self._num_tokens_from_messages(messages=[temp_assistant_prompt_message], tools=[])
 
-                usage = self._calc_response_usage(model=model, credentials=credentials, 
+                usage = self._calc_response_usage(model=model, credentials=credentials,
                                                   prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
-                
+
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,
@@ -538,7 +554,7 @@ class LocalAILanguageModel(LargeLanguageModel):
 
             if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ''):
                 continue
-            
+
             # check if there is a tool call in the response
             function_calls = None
             if delta.delta.function_call:
@@ -562,9 +578,9 @@ class LocalAILanguageModel(LargeLanguageModel):
                 prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools)
                 completion_tokens = self._num_tokens_from_messages(messages=[temp_assistant_prompt_message], tools=[])
 
-                usage = self._calc_response_usage(model=model, credentials=credentials, 
+                usage = self._calc_response_usage(model=model, credentials=credentials,
                                                   prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
-                
+
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,
@@ -613,7 +629,7 @@ class LocalAILanguageModel(LargeLanguageModel):
                 )
                 tool_calls.append(tool_call)
 
-        return tool_calls 
+        return tool_calls
 
     @property
     def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: