Преглед изворни кода

fix: generate summary error when tokens=4097 (#488)

John Wang пре 1 година
родитељ
комит
b9b0866a46

+ 4 - 1
api/core/generator/llm_generator.py

@@ -45,7 +45,7 @@ class LLMGenerator:
         prompt = CONVERSATION_SUMMARY_PROMPT
         prompt_with_empty_context = prompt.format(context='')
         prompt_tokens = TokenCalculator.get_num_tokens(model, prompt_with_empty_context)
-        rest_tokens = llm_constant.max_context_token_length[model] - prompt_tokens - max_tokens
+        rest_tokens = llm_constant.max_context_token_length[model] - prompt_tokens - max_tokens - 1
 
         context = ''
         for message in messages:
@@ -56,6 +56,9 @@ class LLMGenerator:
             if rest_tokens - TokenCalculator.get_num_tokens(model, context + message_qa_text) > 0:
                 context += message_qa_text
 
+        if not context:
+            return '[message too long, no summary]'
+
         prompt = prompt.format(context=context)
 
         llm: StreamableOpenAI = LLMBuilder.to_llm(

+ 1 - 1
api/tasks/generate_conversation_summary_task.py

@@ -28,7 +28,7 @@ def generate_conversation_summary_task(conversation_id: str):
     try:
         # get conversation messages count
         history_message_count = conversation.message_count
-        if history_message_count >= 5:
+        if history_message_count >= 5 and not conversation.summary:
             app_model = conversation.app
             if not app_model:
                 return