|
@@ -397,16 +397,21 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
chunk_index = 0
|
|
|
|
|
|
def create_final_llm_result_chunk(
|
|
|
- index: int, message: AssistantPromptMessage, finish_reason: str
|
|
|
+ id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
|
|
|
) -> LLMResultChunk:
|
|
|
# calculate num tokens
|
|
|
- prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
|
|
- completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
|
|
+ prompt_tokens = usage and usage.get("prompt_tokens")
|
|
|
+ if prompt_tokens is None:
|
|
|
+ prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
|
|
|
+ completion_tokens = usage and usage.get("completion_tokens")
|
|
|
+ if completion_tokens is None:
|
|
|
+ completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
|
|
|
|
|
|
# transform usage
|
|
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
|
|
|
|
|
return LLMResultChunk(
|
|
|
+ id=id,
|
|
|
model=model,
|
|
|
prompt_messages=prompt_messages,
|
|
|
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
|
|
@@ -450,7 +455,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
tool_call.function.arguments += new_tool_call.function.arguments
|
|
|
|
|
|
finish_reason = None # The default value of finish_reason is None
|
|
|
-
|
|
|
+ message_id, usage = None, None
|
|
|
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
|
|
|
chunk = chunk.strip()
|
|
|
if chunk:
|
|
@@ -462,20 +467,26 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
- chunk_json = json.loads(decoded_chunk)
|
|
|
+ chunk_json: dict = json.loads(decoded_chunk)
|
|
|
# stream ended
|
|
|
except json.JSONDecodeError as e:
|
|
|
yield create_final_llm_result_chunk(
|
|
|
+ id=message_id,
|
|
|
index=chunk_index + 1,
|
|
|
message=AssistantPromptMessage(content=""),
|
|
|
finish_reason="Non-JSON encountered.",
|
|
|
+ usage=usage,
|
|
|
)
|
|
|
break
|
|
|
+ if chunk_json:
|
|
|
+ if u := chunk_json.get("usage"):
|
|
|
+ usage = u
|
|
|
if not chunk_json or len(chunk_json["choices"]) == 0:
|
|
|
continue
|
|
|
|
|
|
choice = chunk_json["choices"][0]
|
|
|
finish_reason = chunk_json["choices"][0].get("finish_reason")
|
|
|
+ message_id = chunk_json.get("id")
|
|
|
chunk_index += 1
|
|
|
|
|
|
if "delta" in choice:
|
|
@@ -524,6 +535,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
continue
|
|
|
|
|
|
yield LLMResultChunk(
|
|
|
+ id=message_id,
|
|
|
model=model,
|
|
|
prompt_messages=prompt_messages,
|
|
|
delta=LLMResultChunkDelta(
|
|
@@ -536,6 +548,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
|
|
|
if tools_calls:
|
|
|
yield LLMResultChunk(
|
|
|
+ id=message_id,
|
|
|
model=model,
|
|
|
prompt_messages=prompt_messages,
|
|
|
delta=LLMResultChunkDelta(
|
|
@@ -545,17 +558,22 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
)
|
|
|
|
|
|
yield create_final_llm_result_chunk(
|
|
|
- index=chunk_index, message=AssistantPromptMessage(content=""), finish_reason=finish_reason
|
|
|
+ id=message_id,
|
|
|
+ index=chunk_index,
|
|
|
+ message=AssistantPromptMessage(content=""),
|
|
|
+ finish_reason=finish_reason,
|
|
|
+ usage=usage,
|
|
|
)
|
|
|
|
|
|
def _handle_generate_response(
|
|
|
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
|
|
|
) -> LLMResult:
|
|
|
- response_json = response.json()
|
|
|
+ response_json: dict = response.json()
|
|
|
|
|
|
completion_type = LLMMode.value_of(credentials["mode"])
|
|
|
|
|
|
output = response_json["choices"][0]
|
|
|
+ message_id = response_json.get("id")
|
|
|
|
|
|
response_content = ""
|
|
|
tool_calls = None
|
|
@@ -593,6 +611,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
|
|
|
|
|
|
# transform response
|
|
|
result = LLMResult(
|
|
|
+ id=message_id,
|
|
|
model=response_json["model"],
|
|
|
prompt_messages=prompt_messages,
|
|
|
message=assistant_message,
|