|
@@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
|
|
|
|
|
|
if chunk_json["done"]:
|
|
|
# calculate num tokens
|
|
|
- if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json:
|
|
|
- # transform usage
|
|
|
+ if "prompt_eval_count" in chunk_json:
|
|
|
prompt_tokens = chunk_json["prompt_eval_count"]
|
|
|
- completion_tokens = chunk_json["eval_count"]
|
|
|
else:
|
|
|
- # calculate num tokens
|
|
|
- prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content)
|
|
|
- completion_tokens = self._get_num_tokens_by_gpt2(full_text)
|
|
|
+ prompt_message_content = prompt_messages[0].content
|
|
|
+ if isinstance(prompt_message_content, str):
|
|
|
+ prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content)
|
|
|
+ else:
|
|
|
+ content_text = ""
|
|
|
+ for message_content in prompt_message_content:
|
|
|
+ if message_content.type == PromptMessageContentType.TEXT:
|
|
|
+ message_content = cast(TextPromptMessageContent, message_content)
|
|
|
+ content_text += message_content.data
|
|
|
+ prompt_tokens = self._get_num_tokens_by_gpt2(content_text)
|
|
|
+
|
|
|
+ completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text))
|
|
|
|
|
|
# transform usage
|
|
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|