Parcourir la source

fix: first agent latency (#2334)

Yeuoly il y a 1 an
Parent
commit
3b357f51a6
1 fichiers modifiés avec 6 ajouts et 1 suppressions
  1. 6 1
      api/core/features/assistant_fc_runner.py

+ 6 - 1
api/core/features/assistant_fc_runner.py

@@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                 tool_input='',
                 messages_ids=message_file_ids
             )
-            self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
 
             # recale llm max tokens
             self.recale_llm_max_tokens(self.model_config, prompt_messages)
@@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
             current_llm_usage = None
 
             if self.stream_tool_call:
+                is_first_chunk = True
                 for chunk in chunks:
+                    if is_first_chunk:
+                        self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
+                        is_first_chunk = False
                     # check if there is any tool call
                     if self.check_tool_calls(chunk):
                         function_call_state = True
@@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                 if not result.message.content:
                     result.message.content = ''
 
+                self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
+                
                 yield LLMResultChunk(
                     model=model_instance.model,
                     prompt_messages=result.prompt_messages,