há 1 ano atrás · 9d962053a2
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.py
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py
@@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider):
 
				         """
			
 
				         try:
			
 
				             model_instance = self.get_model_instance(ModelType.LLM)
			
 
				-            bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
			
 
				+
			
 
				+            # Use `amazon.titan-text-lite-v1` model by default for validating credentials
			
 
				+            model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
			
 
				             model_instance.validate_credentials(
			
 
				-                model=bedrock_validate_model_name,
			
 
				+                model=model_for_validation,
			
 
				                 credentials=credentials
			
 
				             )
			
 
				         except CredentialsValidateFailedError as ex:
			
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@@ -74,7 +74,7 @@ provider_credential_schema:
 
				       label:
			
 
				         en_US: Available Model Name
			
 
				         zh_Hans: 可用模型名称
			
 
				-      type: text-input
			
 
				+      type: secret-input
			
 
				       placeholder:
			
 
				         en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
			
 
				         zh_Hans: 为了进行验证，请输入一个您可用的模型名称 (例如：amazon.titan-text-lite-v1)
			
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml
@@ -1,33 +1,50 @@
 
				 model: anthropic.claude-instant-v1
			
 
				 label:
			
 
				-  en_US: Claude Instant V1
			
 
				+  en_US: Claude Instant 1
			
 
				 model_type: llm
			
 
				 model_properties:
			
 
				   mode: chat
			
 
				   context_size: 100000
			
 
				 parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    required: true
			
 
				+    type: int
			
 
				+    default: 4096
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
			
 
				+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
			
 
				   - name: temperature
			
 
				     use_template: temperature
			
 
				-  - name: topP
			
 
				-    use_template: top_p
			
 
				-  - name: topK
			
 
				-    label:
			
 
				-      zh_Hans: 取样数量
			
 
				-      en_US: Top K
			
 
				-    type: int
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 1
			
 
				+    min: 0.0
			
 
				+    max: 1.0
			
 
				     help:
			
 
				-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				-      en_US: Only sample from the top K options for each subsequent token.
			
 
				+      zh_Hans: 生成内容的随机性。
			
 
				+      en_US: The amount of randomness injected into the response.
			
 
				+  - name: top_p
			
 
				     required: false
			
 
				-    default: 250
			
 
				+    type: float
			
 
				+    default: 0.999
			
 
				+    min: 0.000
			
 
				+    max: 1.000
			
 
				+    help:
			
 
				+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
			
 
				+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
			
 
				+  - name: top_k
			
 
				+    required: false
			
 
				+    type: int
			
 
				+    default: 0
			
 
				     min: 0
			
 
				+    # tip docs from aws has error, max value is 500
			
 
				     max: 500
			
 
				-  - name: max_tokens_to_sample
			
 
				-    use_template: max_tokens
			
 
				-    required: true
			
 
				-    default: 4096
			
 
				-    min: 1
			
 
				-    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
			
 
				+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
			
 
				 pricing:
			
 
				   input: '0.0008'
			
 
				   output: '0.0024'
			
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml
@@ -1,33 +1,50 @@
 
				 model: anthropic.claude-v1
			
 
				 label:
			
 
				-  en_US: Claude V1
			
 
				+  en_US: Claude 1
			
 
				 model_type: llm
			
 
				 model_properties:
			
 
				   mode: chat
			
 
				   context_size: 100000
			
 
				 parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    required: true
			
 
				+    type: int
			
 
				+    default: 4096
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
			
 
				+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
			
 
				   - name: temperature
			
 
				     use_template: temperature
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 1
			
 
				+    min: 0.0
			
 
				+    max: 1.0
			
 
				+    help:
			
 
				+      zh_Hans: 生成内容的随机性。
			
 
				+      en_US: The amount of randomness injected into the response.
			
 
				   - name: top_p
			
 
				-    use_template: top_p
			
 
				-  - name: top_k
			
 
				-    label:
			
 
				-      zh_Hans: 取样数量
			
 
				-      en_US: Top K
			
 
				-    type: int
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 0.999
			
 
				+    min: 0.000
			
 
				+    max: 1.000
			
 
				     help:
			
 
				-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				-      en_US: Only sample from the top K options for each subsequent token.
			
 
				+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
			
 
				+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
			
 
				+  - name: top_k
			
 
				     required: false
			
 
				-    default: 250
			
 
				+    type: int
			
 
				+    default: 0
			
 
				     min: 0
			
 
				+    # tip docs from aws has error, max value is 500
			
 
				     max: 500
			
 
				-  - name: max_tokens_to_sample
			
 
				-    use_template: max_tokens
			
 
				-    required: true
			
 
				-    default: 4096
			
 
				-    min: 1
			
 
				-    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
			
 
				+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
			
 
				 pricing:
			
 
				   input: '0.008'
			
 
				   output: '0.024'
			
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml
@@ -1,33 +1,50 @@
 
				 model: anthropic.claude-v2:1
			
 
				 label:
			
 
				-  en_US: Claude V2.1
			
 
				+  en_US: Claude 2.1
			
 
				 model_type: llm
			
 
				 model_properties:
			
 
				   mode: chat
			
 
				   context_size: 200000
			
 
				 parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    required: true
			
 
				+    type: int
			
 
				+    default: 4096
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
			
 
				+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
			
 
				   - name: temperature
			
 
				     use_template: temperature
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 1
			
 
				+    min: 0.0
			
 
				+    max: 1.0
			
 
				+    help:
			
 
				+      zh_Hans: 生成内容的随机性。
			
 
				+      en_US: The amount of randomness injected into the response.
			
 
				   - name: top_p
			
 
				-    use_template: top_p
			
 
				-  - name: top_k
			
 
				-    label:
			
 
				-      zh_Hans: 取样数量
			
 
				-      en_US: Top K
			
 
				-    type: int
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 0.999
			
 
				+    min: 0.000
			
 
				+    max: 1.000
			
 
				     help:
			
 
				-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				-      en_US: Only sample from the top K options for each subsequent token.
			
 
				+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
			
 
				+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
			
 
				+  - name: top_k
			
 
				     required: false
			
 
				-    default: 250
			
 
				+    type: int
			
 
				+    default: 0
			
 
				     min: 0
			
 
				+    # tip docs from aws has error, max value is 500
			
 
				     max: 500
			
 
				-  - name: max_tokens_to_sample
			
 
				-    use_template: max_tokens
			
 
				-    required: true
			
 
				-    default: 4096
			
 
				-    min: 1
			
 
				-    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
			
 
				+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
			
 
				 pricing:
			
 
				   input: '0.008'
			
 
				   output: '0.024'
			
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml
@@ -1,33 +1,50 @@
 
				 model: anthropic.claude-v2
			
 
				 label:
			
 
				-  en_US: Claude V2
			
 
				+  en_US: Claude 2
			
 
				 model_type: llm
			
 
				 model_properties:
			
 
				   mode: chat
			
 
				   context_size: 100000
			
 
				 parameter_rules:
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    required: true
			
 
				+    type: int
			
 
				+    default: 4096
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
			
 
				+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
			
 
				   - name: temperature
			
 
				     use_template: temperature
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 1
			
 
				+    min: 0.0
			
 
				+    max: 1.0
			
 
				+    help:
			
 
				+      zh_Hans: 生成内容的随机性。
			
 
				+      en_US: The amount of randomness injected into the response.
			
 
				   - name: top_p
			
 
				-    use_template: top_p
			
 
				-  - name: top_k
			
 
				-    label:
			
 
				-      zh_Hans: 取样数量
			
 
				-      en_US: Top K
			
 
				-    type: int
			
 
				+    required: false
			
 
				+    type: float
			
 
				+    default: 0.999
			
 
				+    min: 0.000
			
 
				+    max: 1.000
			
 
				     help:
			
 
				-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				-      en_US: Only sample from the top K options for each subsequent token.
			
 
				+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
			
 
				+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
			
 
				+  - name: top_k
			
 
				     required: false
			
 
				-    default: 250
			
 
				+    type: int
			
 
				+    default: 0
			
 
				     min: 0
			
 
				+    # tip docs from aws has error, max value is 500
			
 
				     max: 500
			
 
				-  - name: max_tokens_to_sample
			
 
				-    use_template: max_tokens
			
 
				-    required: true
			
 
				-    default: 4096
			
 
				-    min: 1
			
 
				-    max: 4096
			
 
				+    help:
			
 
				+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
			
 
				+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
			
 
				 pricing:
			
 
				   input: '0.008'
			
 
				   output: '0.024'
			
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@@ -72,16 +72,16 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				         :return: full response or stream response chunk generator result
			
 
				         """
			
 
				 
			
 
				-        # invoke claude 3 models via anthropic official SDK
			
 
				-        if "anthropic.claude-3" in model:
			
 
				-            return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user)
			
 
				-        # invoke model
			
 
				+        # invoke anthropic models via anthropic official SDK
			
 
				+        if "anthropic" in model:
			
 
				+            return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user)
			
 
				+        # invoke other models via boto3 client
			
 
				         return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)
			
 
				 
			
 
				-    def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
			
 
				+    def _generate_anthropic(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
			
 
				                 stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
			
 
				         """
			
 
				-        Invoke Claude3 large language model
			
 
				+        Invoke Anthropic large language model
			
 
				 
			
 
				         :param model: model name
			
 
				         :param credentials: model credentials
			
@@ -114,7 +114,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				             # ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465
			
 
				             # extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user)
			
 
				 
			
 
				-        system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages)
			
 
				+        system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages)
			
 
				 
			
 
				         if system:
			
 
				             extra_model_kwargs['system'] = system
			
@@ -128,11 +128,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				         )
			
 
				 
			
 
				         if stream:
			
 
				-            return self._handle_claude3_stream_response(model, credentials, response, prompt_messages)
			
 
				+            return self._handle_claude_stream_response(model, credentials, response, prompt_messages)
			
 
				 
			
 
				-        return self._handle_claude3_response(model, credentials, response, prompt_messages)
			
 
				+        return self._handle_claude_response(model, credentials, response, prompt_messages)
			
 
				 
			
 
				-    def _handle_claude3_response(self, model: str, credentials: dict, response: Message,
			
 
				+    def _handle_claude_response(self, model: str, credentials: dict, response: Message,
			
 
				                                 prompt_messages: list[PromptMessage]) -> LLMResult:
			
 
				         """
			
 
				         Handle llm chat response
			
@@ -172,7 +172,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				         return response
			
 
				 
			
 
				-    def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
			
 
				+    def _handle_claude_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
			
 
				                                         prompt_messages: list[PromptMessage], ) -> Generator:
			
 
				         """
			
 
				         Handle llm chat stream response
			
@@ -231,7 +231,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				         except Exception as ex:
			
 
				             raise InvokeError(str(ex))
			
 
				 
			
 
				-    def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
			
 
				+    def _calc_claude_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
			
 
				         """
			
 
				         Calculate response usage
			
 
				 
			
@@ -275,7 +275,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				         return usage
			
 
				 
			
 
				-    def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
			
 
				+    def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
			
 
				         """
			
 
				         Convert prompt messages to dict list and system
			
 
				         """
			
@@ -295,11 +295,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				         prompt_message_dicts = []
			
 
				         for message in prompt_messages:
			
 
				             if not isinstance(message, SystemPromptMessage):
			
 
				-                prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message))
			
 
				+                prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message))
			
 
				 
			
 
				         return system, prompt_message_dicts
			
 
				 
			
 
				-    def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict:
			
 
				+    def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict:
			
 
				         """
			
 
				         Convert PromptMessage to dict
			
 
				         """
			
@@ -405,7 +405,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
 
				 
			
 
				         if "anthropic.claude-3" in model:
			
 
				             try:
			
 
				-                self._invoke_claude3(model=model,
			
 
				+                self._invoke_claude(model=model,
			
 
				                                         credentials=credentials,
			
 
				                                         prompt_messages=[{"role": "user", "content": "ping"}],
			
 
				                                         model_parameters={},