|
@@ -9,7 +9,13 @@ from typing import Optional, Union
|
|
|
from core.model_runtime.callbacks.base_callback import Callback
|
|
|
from core.model_runtime.callbacks.logging_callback import LoggingCallback
|
|
|
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
|
|
|
-from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage, PromptMessageTool
|
|
|
+from core.model_runtime.entities.message_entities import (
|
|
|
+ AssistantPromptMessage,
|
|
|
+ PromptMessage,
|
|
|
+ PromptMessageTool,
|
|
|
+ SystemPromptMessage,
|
|
|
+ UserPromptMessage,
|
|
|
+)
|
|
|
from core.model_runtime.entities.model_entities import (
|
|
|
ModelPropertyKey,
|
|
|
ModelType,
|
|
@@ -74,7 +80,20 @@ class LargeLanguageModel(AIModel):
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
- result = self._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
|
|
|
+ if "response_format" in model_parameters:
|
|
|
+ result = self._code_block_mode_wrapper(
|
|
|
+ model=model,
|
|
|
+ credentials=credentials,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ model_parameters=model_parameters,
|
|
|
+ tools=tools,
|
|
|
+ stop=stop,
|
|
|
+ stream=stream,
|
|
|
+ user=user,
|
|
|
+ callbacks=callbacks
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ result = self._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
|
|
|
except Exception as e:
|
|
|
self._trigger_invoke_error_callbacks(
|
|
|
model=model,
|
|
@@ -120,6 +139,239 @@ class LargeLanguageModel(AIModel):
|
|
|
|
|
|
return result
|
|
|
|
|
|
+ def _code_block_mode_wrapper(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
|
|
+ model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None,
|
|
|
+ stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None,
|
|
|
+ callbacks: list[Callback] = None) -> Union[LLMResult, Generator]:
|
|
|
+ """
|
|
|
+ Code block mode wrapper, ensure the response is a code block with output markdown quote
|
|
|
+
|
|
|
+ :param model: model name
|
|
|
+ :param credentials: model credentials
|
|
|
+ :param prompt_messages: prompt messages
|
|
|
+ :param model_parameters: model parameters
|
|
|
+ :param tools: tools for tool calling
|
|
|
+ :param stop: stop words
|
|
|
+ :param stream: is stream response
|
|
|
+ :param user: unique user id
|
|
|
+ :param callbacks: callbacks
|
|
|
+ :return: full response or stream response chunk generator result
|
|
|
+ """
|
|
|
+
|
|
|
+ block_prompts = """You should always follow the instructions and output a valid {{block}} object.
|
|
|
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
|
|
|
+if you are not sure about the structure.
|
|
|
+
|
|
|
+<instructions>
|
|
|
+{{instructions}}
|
|
|
+</instructions>
|
|
|
+"""
|
|
|
+
|
|
|
+ code_block = model_parameters.get("response_format", "")
|
|
|
+ if not code_block:
|
|
|
+ return self._invoke(
|
|
|
+ model=model,
|
|
|
+ credentials=credentials,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ model_parameters=model_parameters,
|
|
|
+ tools=tools,
|
|
|
+ stop=stop,
|
|
|
+ stream=stream,
|
|
|
+ user=user
|
|
|
+ )
|
|
|
+
|
|
|
+ model_parameters.pop("response_format")
|
|
|
+ stop = stop or []
|
|
|
+ stop.extend(["\n```", "```\n"])
|
|
|
+ block_prompts = block_prompts.replace("{{block}}", code_block)
|
|
|
+
|
|
|
+ # check if there is a system message
|
|
|
+ if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
|
|
|
+ # override the system message
|
|
|
+ prompt_messages[0] = SystemPromptMessage(
|
|
|
+ content=block_prompts
|
|
|
+ .replace("{{instructions}}", prompt_messages[0].content)
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ # insert the system message
|
|
|
+ prompt_messages.insert(0, SystemPromptMessage(
|
|
|
+ content=block_prompts
|
|
|
+ .replace("{{instructions}}", f"Please output a valid {code_block} object.")
|
|
|
+ ))
|
|
|
+
|
|
|
+ if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage):
|
|
|
+ # add ```JSON\n to the last message
|
|
|
+ prompt_messages[-1].content += f"\n```{code_block}\n"
|
|
|
+ else:
|
|
|
+ # append a user message
|
|
|
+ prompt_messages.append(UserPromptMessage(
|
|
|
+ content=f"```{code_block}\n"
|
|
|
+ ))
|
|
|
+
|
|
|
+ response = self._invoke(
|
|
|
+ model=model,
|
|
|
+ credentials=credentials,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ model_parameters=model_parameters,
|
|
|
+ tools=tools,
|
|
|
+ stop=stop,
|
|
|
+ stream=stream,
|
|
|
+ user=user
|
|
|
+ )
|
|
|
+
|
|
|
+ if isinstance(response, Generator):
|
|
|
+ first_chunk = next(response)
|
|
|
+ def new_generator():
|
|
|
+ yield first_chunk
|
|
|
+ yield from response
|
|
|
+
|
|
|
+ if first_chunk.delta.message.content and first_chunk.delta.message.content.startswith("`"):
|
|
|
+ return self._code_block_mode_stream_processor_with_backtick(
|
|
|
+ model=model,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ input_generator=new_generator()
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ return self._code_block_mode_stream_processor(
|
|
|
+ model=model,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ input_generator=new_generator()
|
|
|
+ )
|
|
|
+
|
|
|
+ return response
|
|
|
+
|
|
|
+ def _code_block_mode_stream_processor(self, model: str, prompt_messages: list[PromptMessage],
|
|
|
+ input_generator: Generator[LLMResultChunk, None, None]
|
|
|
+ ) -> Generator[LLMResultChunk, None, None]:
|
|
|
+ """
|
|
|
+ Code block mode stream processor, ensure the response is a code block with output markdown quote
|
|
|
+
|
|
|
+ :param model: model name
|
|
|
+ :param prompt_messages: prompt messages
|
|
|
+ :param input_generator: input generator
|
|
|
+ :return: output generator
|
|
|
+ """
|
|
|
+ state = "normal"
|
|
|
+ backtick_count = 0
|
|
|
+ for piece in input_generator:
|
|
|
+ if piece.delta.message.content:
|
|
|
+ content = piece.delta.message.content
|
|
|
+ piece.delta.message.content = ""
|
|
|
+ yield piece
|
|
|
+ piece = content
|
|
|
+ else:
|
|
|
+ yield piece
|
|
|
+ continue
|
|
|
+ new_piece = ""
|
|
|
+ for char in piece:
|
|
|
+ if state == "normal":
|
|
|
+ if char == "`":
|
|
|
+ state = "in_backticks"
|
|
|
+ backtick_count = 1
|
|
|
+ else:
|
|
|
+ new_piece += char
|
|
|
+ elif state == "in_backticks":
|
|
|
+ if char == "`":
|
|
|
+ backtick_count += 1
|
|
|
+ if backtick_count == 3:
|
|
|
+ state = "skip_content"
|
|
|
+ backtick_count = 0
|
|
|
+ else:
|
|
|
+ new_piece += "`" * backtick_count + char
|
|
|
+ state = "normal"
|
|
|
+ backtick_count = 0
|
|
|
+ elif state == "skip_content":
|
|
|
+ if char.isspace():
|
|
|
+ state = "normal"
|
|
|
+
|
|
|
+ if new_piece:
|
|
|
+ yield LLMResultChunk(
|
|
|
+ model=model,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ delta=LLMResultChunkDelta(
|
|
|
+ index=0,
|
|
|
+ message=AssistantPromptMessage(
|
|
|
+ content=new_piece,
|
|
|
+ tool_calls=[]
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ def _code_block_mode_stream_processor_with_backtick(self, model: str, prompt_messages: list,
|
|
|
+ input_generator: Generator[LLMResultChunk, None, None]) \
|
|
|
+ -> Generator[LLMResultChunk, None, None]:
|
|
|
+ """
|
|
|
+ Code block mode stream processor, ensure the response is a code block with output markdown quote.
|
|
|
+ This version skips the language identifier that follows the opening triple backticks.
|
|
|
+
|
|
|
+ :param model: model name
|
|
|
+ :param prompt_messages: prompt messages
|
|
|
+ :param input_generator: input generator
|
|
|
+ :return: output generator
|
|
|
+ """
|
|
|
+ state = "search_start"
|
|
|
+ backtick_count = 0
|
|
|
+
|
|
|
+ for piece in input_generator:
|
|
|
+ if piece.delta.message.content:
|
|
|
+ content = piece.delta.message.content
|
|
|
+ # Reset content to ensure we're only processing and yielding the relevant parts
|
|
|
+ piece.delta.message.content = ""
|
|
|
+ # Yield a piece with cleared content before processing it to maintain the generator structure
|
|
|
+ yield piece
|
|
|
+ piece = content
|
|
|
+ else:
|
|
|
+ # Yield pieces without content directly
|
|
|
+ yield piece
|
|
|
+ continue
|
|
|
+
|
|
|
+ if state == "done":
|
|
|
+ continue
|
|
|
+
|
|
|
+ new_piece = ""
|
|
|
+ for char in piece:
|
|
|
+ if state == "search_start":
|
|
|
+ if char == "`":
|
|
|
+ backtick_count += 1
|
|
|
+ if backtick_count == 3:
|
|
|
+ state = "skip_language"
|
|
|
+ backtick_count = 0
|
|
|
+ else:
|
|
|
+ backtick_count = 0
|
|
|
+ elif state == "skip_language":
|
|
|
+ # Skip everything until the first newline, marking the end of the language identifier
|
|
|
+ if char == "\n":
|
|
|
+ state = "in_code_block"
|
|
|
+ elif state == "in_code_block":
|
|
|
+ if char == "`":
|
|
|
+ backtick_count += 1
|
|
|
+ if backtick_count == 3:
|
|
|
+ state = "done"
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if backtick_count > 0:
|
|
|
+ # If backticks were counted but we're still collecting content, it was a false start
|
|
|
+ new_piece += "`" * backtick_count
|
|
|
+ backtick_count = 0
|
|
|
+ new_piece += char
|
|
|
+
|
|
|
+ elif state == "done":
|
|
|
+ break
|
|
|
+
|
|
|
+ if new_piece:
|
|
|
+ # Only yield content collected within the code block
|
|
|
+ yield LLMResultChunk(
|
|
|
+ model=model,
|
|
|
+ prompt_messages=prompt_messages,
|
|
|
+ delta=LLMResultChunkDelta(
|
|
|
+ index=0,
|
|
|
+ message=AssistantPromptMessage(
|
|
|
+ content=new_piece,
|
|
|
+ tool_calls=[]
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
def _invoke_result_generator(self, model: str, result: Generator, credentials: dict,
|
|
|
prompt_messages: list[PromptMessage], model_parameters: dict,
|
|
|
tools: Optional[list[PromptMessageTool]] = None,
|
|
@@ -204,7 +456,7 @@ class LargeLanguageModel(AIModel):
|
|
|
:return: full response or stream response chunk generator result
|
|
|
"""
|
|
|
raise NotImplementedError
|
|
|
-
|
|
|
+
|
|
|
@abstractmethod
|
|
|
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
|
|
|
tools: Optional[list[PromptMessageTool]] = None) -> int:
|