il y a 2 mois · 3f42fabff8
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -314,7 +314,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
 
				         """
			
 
				         full_text = ""
			
 
				         chunk_index = 0
			
 
				-        is_reasoning_started = False
			
 
				 
			
 
				         def create_final_llm_result_chunk(
			
 
				             index: int, message: AssistantPromptMessage, finish_reason: str
			
@@ -368,14 +367,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				                 # transform assistant message to prompt message
			
 
				                 text = chunk_json["response"]
			
 
				-            if "<think>" in text:
			
 
				-                is_reasoning_started = True
			
 
				-                text = text.replace("<think>", "> 💭 ")
			
 
				-            elif "</think>" in text:
			
 
				-                is_reasoning_started = False
			
 
				-                text = text.replace("</think>", "") + "\n\n"
			
 
				-            elif is_reasoning_started:
			
 
				-                text = text.replace("\n", "\n> ")
			
 
				+            text = self._wrap_thinking_by_tag(text)
			
 
				 
			
 
				             assistant_prompt_message = AssistantPromptMessage(content=text)
			
 
				 
			
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -1,4 +1,3 @@
 
				-import re
			
 
				 from collections.abc import Generator, Iterator
			
 
				 from typing import Optional, cast
			
 
				 
			
@@ -636,16 +635,13 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
 
				         handle stream chat generate response
			
 
				         """
			
 
				         full_response = ""
			
 
				-        is_reasoning_started_tag = False
			
 
				         for chunk in resp:
			
 
				             if len(chunk.choices) == 0:
			
 
				                 continue
			
 
				             delta = chunk.choices[0]
			
 
				             if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
			
 
				                 continue
			
 
				-            delta_content = delta.delta.content
			
 
				-            if not delta_content:
			
 
				-                delta_content = ""
			
 
				+            delta_content = delta.delta.content or ""
			
 
				             # check if there is a tool call in the response
			
 
				             function_call = None
			
 
				             tool_calls = []
			
@@ -658,15 +654,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
 
				             if function_call:
			
 
				                 assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
			
 
				 
			
 
				-            if not is_reasoning_started_tag and "<think>" in delta_content:
			
 
				-                is_reasoning_started_tag = True
			
 
				-                delta_content = "> 💭 " + delta_content.replace("<think>", "")
			
 
				-            elif is_reasoning_started_tag and "</think>" in delta_content:
			
 
				-                delta_content = delta_content.replace("</think>", "") + "\n\n"
			
 
				-                is_reasoning_started_tag = False
			
 
				-            elif is_reasoning_started_tag:
			
 
				-                if "\n" in delta_content:
			
 
				-                    delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
			
 
				+            delta_content = self._wrap_thinking_by_tag(delta_content)
			
 
				             # transform assistant message to prompt message
			
 
				             assistant_prompt_message = AssistantPromptMessage(
			
 
				                 content=delta_content or "", tool_calls=assistant_message_tool_calls