|
@@ -1,3 +1,4 @@
|
|
|
|
+import re
|
|
from collections.abc import Generator, Iterator
|
|
from collections.abc import Generator, Iterator
|
|
from typing import Optional, cast
|
|
from typing import Optional, cast
|
|
|
|
|
|
@@ -635,16 +636,16 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
|
handle stream chat generate response
|
|
handle stream chat generate response
|
|
"""
|
|
"""
|
|
full_response = ""
|
|
full_response = ""
|
|
-
|
|
|
|
|
|
+ is_reasoning_started_tag = False
|
|
for chunk in resp:
|
|
for chunk in resp:
|
|
if len(chunk.choices) == 0:
|
|
if len(chunk.choices) == 0:
|
|
continue
|
|
continue
|
|
-
|
|
|
|
delta = chunk.choices[0]
|
|
delta = chunk.choices[0]
|
|
-
|
|
|
|
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
|
|
if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ""):
|
|
continue
|
|
continue
|
|
-
|
|
|
|
|
|
+ delta_content = delta.delta.content
|
|
|
|
+ if not delta_content:
|
|
|
|
+ delta_content = ""
|
|
# check if there is a tool call in the response
|
|
# check if there is a tool call in the response
|
|
function_call = None
|
|
function_call = None
|
|
tool_calls = []
|
|
tool_calls = []
|
|
@@ -657,9 +658,18 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
|
if function_call:
|
|
if function_call:
|
|
assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
|
|
assistant_message_tool_calls += [self._extract_response_function_call(function_call)]
|
|
|
|
|
|
|
|
+ if not is_reasoning_started_tag and "<think>" in delta_content:
|
|
|
|
+ is_reasoning_started_tag = True
|
|
|
|
+ delta_content = "> 💭 " + delta_content.replace("<think>", "")
|
|
|
|
+ elif is_reasoning_started_tag and "</think>" in delta_content:
|
|
|
|
+ delta_content = delta_content.replace("</think>", "") + "\n\n"
|
|
|
|
+ is_reasoning_started_tag = False
|
|
|
|
+ elif is_reasoning_started_tag:
|
|
|
|
+ if "\n" in delta_content:
|
|
|
|
+ delta_content = re.sub(r"\n(?!(>|\n))", "\n> ", delta_content)
|
|
# transform assistant message to prompt message
|
|
# transform assistant message to prompt message
|
|
assistant_prompt_message = AssistantPromptMessage(
|
|
assistant_prompt_message = AssistantPromptMessage(
|
|
- content=delta.delta.content or "", tool_calls=assistant_message_tool_calls
|
|
|
|
|
|
+ content=delta_content or "", tool_calls=assistant_message_tool_calls
|
|
)
|
|
)
|
|
|
|
|
|
if delta.finish_reason is not None:
|
|
if delta.finish_reason is not None:
|
|
@@ -697,7 +707,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
|
|
),
|
|
),
|
|
)
|
|
)
|
|
|
|
|
|
- full_response += delta.delta.content
|
|
|
|
|
|
+ full_response += delta_content
|
|
|
|
|
|
def _handle_completion_generate_response(
|
|
def _handle_completion_generate_response(
|
|
self,
|
|
self,
|