7 maanden geleden · a45ac6ab98
--- a/api/core/model_runtime/model_providers/volcengine_maas/client.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py
@@ -208,11 +208,9 @@ class ArkClientV3:
 
				             presence_penalty=presence_penalty,
			
 
				             top_p=top_p,
			
 
				             temperature=temperature,
			
 
				+            stream_options={"include_usage": True},
			
 
				         )
			
 
				-        for chunk in chunks:
			
 
				-            if not chunk.choices:
			
 
				-                continue
			
 
				-            yield chunk
			
 
				+        yield from chunks
			
 
				 
			
 
				     def embeddings(self, texts: list[str]) -> CreateEmbeddingResponse:
			
 
				         return self.ark.embeddings.create(model=self.endpoint_id, input=texts)
			
--- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
+++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py
@@ -239,16 +239,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				         def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator:
			
 
				             for chunk in chunks:
			
 
				-                if not chunk.choices:
			
 
				-                    continue
			
 
				-                choice = chunk.choices[0]
			
 
				-
			
 
				                 yield LLMResultChunk(
			
 
				                     model=model,
			
 
				                     prompt_messages=prompt_messages,
			
 
				                     delta=LLMResultChunkDelta(
			
 
				-                        index=choice.index,
			
 
				-                        message=AssistantPromptMessage(content=choice.delta.content, tool_calls=[]),
			
 
				+                        index=0,
			
 
				+                        message=AssistantPromptMessage(
			
 
				+                            content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[]
			
 
				+                        ),
			
 
				                         usage=self._calc_response_usage(
			
 
				                             model=model,
			
 
				                             credentials=credentials,
			
@@ -257,7 +255,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel):
 
				                         )
			
 
				                         if chunk.usage
			
 
				                         else None,
			
 
				-                        finish_reason=choice.finish_reason,
			
 
				+                        finish_reason=chunk.choices[0].finish_reason if chunk.choices else None,
			
 
				                     ),
			
 
				                 )