2 місяців тому · 75113c26c6
--- a/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/_position.yaml
@@ -1,3 +1,7 @@
 
				+- deepseek-r1
			
 
				+- deepseek-r1-distill-qwen-14b
			
 
				+- deepseek-r1-distill-qwen-32b
			
 
				+- deepseek-v3
			
 
				 - qwen-vl-max-0809
			
 
				 - qwen-vl-max-0201
			
 
				 - qwen-vl-max
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-14B.yaml
@@ -0,0 +1,21 @@
 
				+model: deepseek-r1-distill-qwen-14b
			
 
				+label:
			
 
				+  zh_Hans: DeepSeek-R1-Distill-Qwen-14B
			
 
				+  en_US: DeepSeek-R1-Distill-Qwen-14B
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 32000
			
 
				+parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 8192
			
 
				+    default: 4096
			
 
				+pricing:
			
 
				+  input: "0.001"
			
 
				+  output: "0.003"
			
 
				+  unit: "0.001"
			
 
				+  currency: RMB
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1-distill-qwen-32B.yaml
@@ -0,0 +1,21 @@
 
				+model: deepseek-r1-distill-qwen-32b
			
 
				+label:
			
 
				+  zh_Hans: DeepSeek-R1-Distill-Qwen-32B
			
 
				+  en_US: DeepSeek-R1-Distill-Qwen-32B
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 32000
			
 
				+parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 8192
			
 
				+    default: 4096
			
 
				+pricing:
			
 
				+  input: "0.002"
			
 
				+  output: "0.006"
			
 
				+  unit: "0.001"
			
 
				+  currency: RMB
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-r1.yaml
@@ -0,0 +1,21 @@
 
				+model: deepseek-r1
			
 
				+label:
			
 
				+  zh_Hans: DeepSeek-R1
			
 
				+  en_US: DeepSeek-R1
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 64000
			
 
				+parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 8192
			
 
				+    default: 4096
			
 
				+pricing:
			
 
				+  input: "0.004"
			
 
				+  output: "0.016"
			
 
				+  unit: '0.001'
			
 
				+  currency: RMB
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/deepseek-v3.yaml
@@ -0,0 +1,52 @@
 
				+model: deepseek-v3
			
 
				+label:
			
 
				+  zh_Hans: DeepSeek-V3
			
 
				+  en_US: DeepSeek-V3
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 64000
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    type: int
			
 
				+    default: 512
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
			
 
				+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+  - name: top_k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+  - name: response_format
			
 
				+    label:
			
 
				+      zh_Hans: 回复格式
			
 
				+      en_US: Response Format
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 指定模型必须输出的格式
			
 
				+      en_US: specifying the format that the model must output
			
 
				+    required: false
			
 
				+    options:
			
 
				+      - text
			
 
				+      - json_object
			
 
				+pricing:
			
 
				+  input: "0.002"
			
 
				+  output: "0.008"
			
 
				+  unit: "0.001"
			
 
				+  currency: RMB
			
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@@ -197,8 +197,7 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				         else:
			
 
				             # nothing different between chat model and completion model in tongyi
			
 
				             params["messages"] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
			
 
				-            response = Generation.call(**params, result_format="message", stream=stream)
			
 
				-
			
 
				+            response = Generation.call(**params, result_format="message", stream=stream, incremental_output=True)
			
 
				         if stream:
			
 
				             return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
			
 
				 
			
@@ -258,6 +257,9 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				         """
			
 
				         full_text = ""
			
 
				         tool_calls = []
			
 
				+        is_reasoning_started = False
			
 
				+        # for index, response in enumerate(responses):
			
 
				+        index = 0
			
 
				         for index, response in enumerate(responses):
			
 
				             if response.status_code not in {200, HTTPStatus.OK}:
			
 
				                 raise ServiceUnavailableError(
			
@@ -311,7 +313,11 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
 
				                     ),
			
 
				                 )
			
 
				             else:
			
 
				-                resp_content = response.output.choices[0].message.content
			
 
				+                message = response.output.choices[0].message
			
 
				+
			
 
				+                resp_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
			
 
				+                    message, is_reasoning_started
			
 
				+                )
			
 
				                 if not resp_content:
			
 
				                     if "tool_calls" in response.output.choices[0].message:
			
 
				                         tool_calls = response.output.choices[0].message["tool_calls"]