há 1 ano atrás · a18dde9b0d
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -1,5 +1,6 @@
 
				 import logging
			
 
				 import os
			
 
				+import re
			
 
				 import time
			
 
				 from abc import abstractmethod
			
 
				 from typing import Generator, List, Optional, Union
			
@@ -212,6 +213,10 @@ class LargeLanguageModel(AIModel):
 
				         """
			
 
				         raise NotImplementedError
			
 
				 
			
 
				+    def enforce_stop_tokens(self, text: str, stop: List[str]) -> str:
			
 
				+        """Cut off the text as soon as any stop words occur."""
			
 
				+        return re.split("|".join(stop), text, maxsplit=1)[0]
			
 
				+
			
 
				     def _llm_result_to_stream(self, result: LLMResult) -> Generator:
			
 
				         """
			
 
				         Transform llm result to stream
			
--- a/api/core/model_runtime/model_providers/cohere/cohere.yaml
+++ b/api/core/model_runtime/model_providers/cohere/cohere.yaml
@@ -14,9 +14,12 @@ help:
 
				   url:
			
 
				     en_US: https://dashboard.cohere.com/api-keys
			
 
				 supported_model_types:
			
 
				+  - llm
			
 
				+  - text-embedding
			
 
				   - rerank
			
 
				 configurate_methods:
			
 
				   - predefined-model
			
 
				+  - customizable-model
			
 
				 provider_credential_schema:
			
 
				   credential_form_schemas:
			
 
				     - variable: api_key
			
@@ -26,6 +29,44 @@ provider_credential_schema:
 
				       type: secret-input
			
 
				       required: true
			
 
				       placeholder:
			
 
				-        zh_Hans: 请填写 API Key
			
 
				-        en_US: Please fill in API Key
			
 
				+        zh_Hans: 在此输入您的 API Key
			
 
				+        en_US: Enter your API Key
			
 
				       show_on: [ ]
			
 
				+model_credential_schema:
			
 
				+  model:
			
 
				+    label:
			
 
				+      en_US: Model Name
			
 
				+      zh_Hans: 模型名称
			
 
				+    placeholder:
			
 
				+      en_US: Enter your model name
			
 
				+      zh_Hans: 输入模型名称
			
 
				+  credential_form_schemas:
			
 
				+    - variable: mode
			
 
				+      show_on:
			
 
				+        - variable: __model_type
			
 
				+          value: llm
			
 
				+      label:
			
 
				+        en_US: Completion mode
			
 
				+      type: select
			
 
				+      required: false
			
 
				+      default: chat
			
 
				+      placeholder:
			
 
				+        zh_Hans: 选择对话类型
			
 
				+        en_US: Select completion mode
			
 
				+      options:
			
 
				+        - value: completion
			
 
				+          label:
			
 
				+            en_US: Completion
			
 
				+            zh_Hans: 补全
			
 
				+        - value: chat
			
 
				+          label:
			
 
				+            en_US: Chat
			
 
				+            zh_Hans: 对话
			
 
				+    - variable: api_key
			
 
				+      label:
			
 
				+        en_US: API Key
			
 
				+      type: secret-input
			
 
				+      required: true
			
 
				+      placeholder:
			
 
				+        zh_Hans: 在此输入您的 API Key
			
 
				+        en_US: Enter your API Key
			
--- a/api/core/model_runtime/model_providers/cohere/llm/__init__.py
+++ b/api/core/model_runtime/model_providers/cohere/llm/__init__.py
--- a/api/core/model_runtime/model_providers/cohere/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/_position.yaml
@@ -0,0 +1,8 @@
 
				+- command-chat
			
 
				+- command-light-chat
			
 
				+- command-nightly-chat
			
 
				+- command-light-nightly-chat
			
 
				+- command
			
 
				+- command-light
			
 
				+- command-nightly
			
 
				+- command-light-nightly
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-chat.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-chat.yaml
@@ -0,0 +1,62 @@
 
				+model: command-chat
			
 
				+label:
			
 
				+  zh_Hans: command-chat
			
 
				+  en_US: command-chat
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+  - name: preamble_override
			
 
				+    label:
			
 
				+      zh_Hans: 前导文本
			
 
				+      en_US: Preamble
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 当指定时，将使用提供的前导文本替换默认的 Cohere 前导文本。
			
 
				+      en_US: When specified, the default Cohere preamble will be replaced with the provided one.
			
 
				+    required: false
			
 
				+  - name: prompt_truncation
			
 
				+    label:
			
 
				+      zh_Hans: 提示截断
			
 
				+      en_US: Prompt Truncation
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时，将会丢弃一些来自聊天记录的元素，以尝试构造一个符合模型上下文长度限制的 Prompt。
			
 
				+      en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
			
 
				+    required: true
			
 
				+    default: 'AUTO'
			
 
				+    options:
			
 
				+      - 'AUTO'
			
 
				+      - 'OFF'
			
 
				+pricing:
			
 
				+  input: '1.0'
			
 
				+  output: '2.0'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-light-chat.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-light-chat.yaml
@@ -0,0 +1,62 @@
 
				+model: command-light-chat
			
 
				+label:
			
 
				+  zh_Hans: command-light-chat
			
 
				+  en_US: command-light-chat
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+  - name: preamble_override
			
 
				+    label:
			
 
				+      zh_Hans: 前导文本
			
 
				+      en_US: Preamble
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 当指定时，将使用提供的前导文本替换默认的 Cohere 前导文本。
			
 
				+      en_US: When specified, the default Cohere preamble will be replaced with the provided one.
			
 
				+    required: false
			
 
				+  - name: prompt_truncation
			
 
				+    label:
			
 
				+      zh_Hans: 提示截断
			
 
				+      en_US: Prompt Truncation
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时，将会丢弃一些来自聊天记录的元素，以尝试构造一个符合模型上下文长度限制的 Prompt。
			
 
				+      en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
			
 
				+    required: true
			
 
				+    default: 'AUTO'
			
 
				+    options:
			
 
				+      - 'AUTO'
			
 
				+      - 'OFF'
			
 
				+pricing:
			
 
				+  input: '0.3'
			
 
				+  output: '0.6'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-light-nightly-chat.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-light-nightly-chat.yaml
@@ -0,0 +1,62 @@
 
				+model: command-light-nightly-chat
			
 
				+label:
			
 
				+  zh_Hans: command-light-nightly-chat
			
 
				+  en_US: command-light-nightly-chat
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+  - name: preamble_override
			
 
				+    label:
			
 
				+      zh_Hans: 前导文本
			
 
				+      en_US: Preamble
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 当指定时，将使用提供的前导文本替换默认的 Cohere 前导文本。
			
 
				+      en_US: When specified, the default Cohere preamble will be replaced with the provided one.
			
 
				+    required: false
			
 
				+  - name: prompt_truncation
			
 
				+    label:
			
 
				+      zh_Hans: 提示截断
			
 
				+      en_US: Prompt Truncation
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时，将会丢弃一些来自聊天记录的元素，以尝试构造一个符合模型上下文长度限制的 Prompt。
			
 
				+      en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
			
 
				+    required: true
			
 
				+    default: 'AUTO'
			
 
				+    options:
			
 
				+      - 'AUTO'
			
 
				+      - 'OFF'
			
 
				+pricing:
			
 
				+  input: '0.3'
			
 
				+  output: '0.6'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-light-nightly.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-light-nightly.yaml
@@ -0,0 +1,44 @@
 
				+model: command-light-nightly
			
 
				+label:
			
 
				+  zh_Hans: command-light-nightly
			
 
				+  en_US: command-light-nightly
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: completion
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+pricing:
			
 
				+  input: '0.3'
			
 
				+  output: '0.6'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-light.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-light.yaml
@@ -0,0 +1,44 @@
 
				+model: command-light
			
 
				+label:
			
 
				+  zh_Hans: command-light
			
 
				+  en_US: command-light
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: completion
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+pricing:
			
 
				+  input: '0.3'
			
 
				+  output: '0.6'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-nightly-chat.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-nightly-chat.yaml
@@ -0,0 +1,62 @@
 
				+model: command-nightly-chat
			
 
				+label:
			
 
				+  zh_Hans: command-nightly-chat
			
 
				+  en_US: command-nightly-chat
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+  - name: preamble_override
			
 
				+    label:
			
 
				+      zh_Hans: 前导文本
			
 
				+      en_US: Preamble
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 当指定时，将使用提供的前导文本替换默认的 Cohere 前导文本。
			
 
				+      en_US: When specified, the default Cohere preamble will be replaced with the provided one.
			
 
				+    required: false
			
 
				+  - name: prompt_truncation
			
 
				+    label:
			
 
				+      zh_Hans: 提示截断
			
 
				+      en_US: Prompt Truncation
			
 
				+    type: string
			
 
				+    help:
			
 
				+      zh_Hans: 指定如何构造 Prompt。当 prompt_truncation 设置为 "AUTO" 时，将会丢弃一些来自聊天记录的元素，以尝试构造一个符合模型上下文长度限制的 Prompt。
			
 
				+      en_US: Dictates how the prompt will be constructed. With prompt_truncation set to "AUTO", some elements from chat histories will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
			
 
				+    required: true
			
 
				+    default: 'AUTO'
			
 
				+    options:
			
 
				+      - 'AUTO'
			
 
				+      - 'OFF'
			
 
				+pricing:
			
 
				+  input: '1.0'
			
 
				+  output: '2.0'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command-nightly.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command-nightly.yaml
@@ -0,0 +1,44 @@
 
				+model: command-nightly
			
 
				+label:
			
 
				+  zh_Hans: command-nightly
			
 
				+  en_US: command-nightly
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: completion
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+pricing:
			
 
				+  input: '1.0'
			
 
				+  output: '2.0'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/command.yaml
+++ b/api/core/model_runtime/model_providers/cohere/llm/command.yaml
@@ -0,0 +1,44 @@
 
				+model: command
			
 
				+label:
			
 
				+  zh_Hans: command
			
 
				+  en_US: command
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: completion
			
 
				+  context_size: 4096
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    max: 5.0
			
 
				+  - name: p
			
 
				+    use_template: top_p
			
 
				+    default: 0.75
			
 
				+    min: 0.01
			
 
				+    max: 0.99
			
 
				+  - name: k
			
 
				+    label:
			
 
				+      zh_Hans: 取样数量
			
 
				+      en_US: Top k
			
 
				+    type: int
			
 
				+    help:
			
 
				+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
			
 
				+      en_US: Only sample from the top K options for each subsequent token.
			
 
				+    required: false
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 500
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    default: 256
			
 
				+    max: 4096
			
 
				+pricing:
			
 
				+  input: '1.0'
			
 
				+  output: '2.0'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/llm/llm.py
+++ b/api/core/model_runtime/model_providers/cohere/llm/llm.py
@@ -0,0 +1,565 @@
 
				+import logging
			
 
				+from typing import Generator, List, Optional, Union, cast, Tuple
			
 
				+
			
 
				+import cohere
			
 
				+from cohere.responses import Chat, Generations
			
 
				+from cohere.responses.chat import StreamingChat, StreamTextGeneration, StreamEnd
			
 
				+from cohere.responses.generation import StreamingText, StreamingGenerations
			
 
				+
			
 
				+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
			
 
				+from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessage,
			
 
				+                                                          PromptMessageContentType, SystemPromptMessage,
			
 
				+                                                          TextPromptMessageContent, UserPromptMessage,
			
 
				+                                                          PromptMessageTool)
			
 
				+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, I18nObject, ModelType
			
 
				+from core.model_runtime.errors.invoke import InvokeConnectionError, InvokeServerUnavailableError, InvokeError, \
			
 
				+    InvokeRateLimitError, InvokeAuthorizationError, InvokeBadRequestError
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class CohereLargeLanguageModel(LargeLanguageModel):
			
 
				+    """
			
 
				+    Model class for Cohere large language model.
			
 
				+    """
			
 
				+
			
 
				+    def _invoke(self, model: str, credentials: dict,
			
 
				+                prompt_messages: list[PromptMessage], model_parameters: dict,
			
 
				+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[List[str]] = None,
			
 
				+                stream: bool = True, user: Optional[str] = None) \
			
 
				+            -> Union[LLMResult, Generator]:
			
 
				+        """
			
 
				+        Invoke large language model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param model_parameters: model parameters
			
 
				+        :param tools: tools for tool calling
			
 
				+        :param stop: stop words
			
 
				+        :param stream: is stream response
			
 
				+        :param user: unique user id
			
 
				+        :return: full response or stream response chunk generator result
			
 
				+        """
			
 
				+        # get model mode
			
 
				+        model_mode = self.get_model_mode(model, credentials)
			
 
				+
			
 
				+        if model_mode == LLMMode.CHAT:
			
 
				+            return self._chat_generate(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                prompt_messages=prompt_messages,
			
 
				+                model_parameters=model_parameters,
			
 
				+                stop=stop,
			
 
				+                stream=stream,
			
 
				+                user=user
			
 
				+            )
			
 
				+        else:
			
 
				+            return self._generate(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                prompt_messages=prompt_messages,
			
 
				+                model_parameters=model_parameters,
			
 
				+                stop=stop,
			
 
				+                stream=stream,
			
 
				+                user=user
			
 
				+            )
			
 
				+
			
 
				+    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
			
 
				+                       tools: Optional[list[PromptMessageTool]] = None) -> int:
			
 
				+        """
			
 
				+        Get number of tokens for given prompt messages
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param tools: tools for tool calling
			
 
				+        :return:
			
 
				+        """
			
 
				+        # get model mode
			
 
				+        model_mode = self.get_model_mode(model)
			
 
				+
			
 
				+        try:
			
 
				+            if model_mode == LLMMode.CHAT:
			
 
				+                return self._num_tokens_from_messages(model, credentials, prompt_messages)
			
 
				+            else:
			
 
				+                return self._num_tokens_from_string(model, credentials, prompt_messages[0].content)
			
 
				+        except Exception as e:
			
 
				+            raise self._transform_invoke_error(e)
			
 
				+
			
 
				+    def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				+        """
			
 
				+        Validate model credentials
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            # get model mode
			
 
				+            model_mode = self.get_model_mode(model)
			
 
				+
			
 
				+            if model_mode == LLMMode.CHAT:
			
 
				+                self._chat_generate(
			
 
				+                    model=model,
			
 
				+                    credentials=credentials,
			
 
				+                    prompt_messages=[UserPromptMessage(content='ping')],
			
 
				+                    model_parameters={
			
 
				+                        'max_tokens': 20,
			
 
				+                        'temperature': 0,
			
 
				+                    },
			
 
				+                    stream=False
			
 
				+                )
			
 
				+            else:
			
 
				+                self._generate(
			
 
				+                    model=model,
			
 
				+                    credentials=credentials,
			
 
				+                    prompt_messages=[UserPromptMessage(content='ping')],
			
 
				+                    model_parameters={
			
 
				+                        'max_tokens': 20,
			
 
				+                        'temperature': 0,
			
 
				+                    },
			
 
				+                    stream=False
			
 
				+                )
			
 
				+        except Exception as ex:
			
 
				+            raise CredentialsValidateFailedError(str(ex))
			
 
				+
			
 
				+    def _generate(self, model: str, credentials: dict,
			
 
				+                  prompt_messages: list[PromptMessage], model_parameters: dict, stop: Optional[List[str]] = None,
			
 
				+                  stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
			
 
				+        """
			
 
				+        Invoke llm model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: credentials
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param model_parameters: model parameters
			
 
				+        :param stop: stop words
			
 
				+        :param stream: is stream response
			
 
				+        :param user: unique user id
			
 
				+        :return: full response or stream response chunk generator result
			
 
				+        """
			
 
				+        # initialize client
			
 
				+        client = cohere.Client(credentials.get('api_key'))
			
 
				+
			
 
				+        if stop:
			
 
				+            model_parameters['end_sequences'] = stop
			
 
				+
			
 
				+        response = client.generate(
			
 
				+            prompt=prompt_messages[0].content,
			
 
				+            model=model,
			
 
				+            stream=stream,
			
 
				+            **model_parameters,
			
 
				+        )
			
 
				+
			
 
				+        if stream:
			
 
				+            return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
			
 
				+
			
 
				+        return self._handle_generate_response(model, credentials, response, prompt_messages)
			
 
				+
			
 
				+    def _handle_generate_response(self, model: str, credentials: dict, response: Generations,
			
 
				+                                  prompt_messages: list[PromptMessage]) \
			
 
				+            -> LLMResult:
			
 
				+        """
			
 
				+        Handle llm response
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: credentials
			
 
				+        :param response: response
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :return: llm response
			
 
				+        """
			
 
				+        assistant_text = response.generations[0].text
			
 
				+
			
 
				+        # transform assistant message to prompt message
			
 
				+        assistant_prompt_message = AssistantPromptMessage(
			
 
				+            content=assistant_text
			
 
				+        )
			
 
				+
			
 
				+        # calculate num tokens
			
 
				+        prompt_tokens = response.meta['billed_units']['input_tokens']
			
 
				+        completion_tokens = response.meta['billed_units']['output_tokens']
			
 
				+
			
 
				+        # transform usage
			
 
				+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
			
 
				+
			
 
				+        # transform response
			
 
				+        response = LLMResult(
			
 
				+            model=model,
			
 
				+            prompt_messages=prompt_messages,
			
 
				+            message=assistant_prompt_message,
			
 
				+            usage=usage
			
 
				+        )
			
 
				+
			
 
				+        return response
			
 
				+
			
 
				+    def _handle_generate_stream_response(self, model: str, credentials: dict, response: StreamingGenerations,
			
 
				+                                         prompt_messages: list[PromptMessage]) -> Generator:
			
 
				+        """
			
 
				+        Handle llm stream response
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param response: response
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :return: llm response chunk generator
			
 
				+        """
			
 
				+        index = 1
			
 
				+        full_assistant_content = ''
			
 
				+        for chunk in response:
			
 
				+            if isinstance(chunk, StreamingText):
			
 
				+                chunk = cast(StreamingText, chunk)
			
 
				+                text = chunk.text
			
 
				+
			
 
				+                if text is None:
			
 
				+                    continue
			
 
				+
			
 
				+                # transform assistant message to prompt message
			
 
				+                assistant_prompt_message = AssistantPromptMessage(
			
 
				+                    content=text
			
 
				+                )
			
 
				+
			
 
				+                full_assistant_content += text
			
 
				+
			
 
				+                yield LLMResultChunk(
			
 
				+                    model=model,
			
 
				+                    prompt_messages=prompt_messages,
			
 
				+                    delta=LLMResultChunkDelta(
			
 
				+                        index=index,
			
 
				+                        message=assistant_prompt_message,
			
 
				+                    )
			
 
				+                )
			
 
				+
			
 
				+                index += 1
			
 
				+            elif chunk is None:
			
 
				+                # calculate num tokens
			
 
				+                prompt_tokens = response.meta['billed_units']['input_tokens']
			
 
				+                completion_tokens = response.meta['billed_units']['output_tokens']
			
 
				+
			
 
				+                # transform usage
			
 
				+                usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
			
 
				+
			
 
				+                yield LLMResultChunk(
			
 
				+                    model=model,
			
 
				+                    prompt_messages=prompt_messages,
			
 
				+                    delta=LLMResultChunkDelta(
			
 
				+                        index=index,
			
 
				+                        message=AssistantPromptMessage(content=''),
			
 
				+                        finish_reason=response.finish_reason,
			
 
				+                        usage=usage
			
 
				+                    )
			
 
				+                )
			
 
				+                break
			
 
				+
			
 
				+    def _chat_generate(self, model: str, credentials: dict,
			
 
				+                       prompt_messages: list[PromptMessage], model_parameters: dict, stop: Optional[List[str]] = None,
			
 
				+                       stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
			
 
				+        """
			
 
				+        Invoke llm chat model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: credentials
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param model_parameters: model parameters
			
 
				+        :param stop: stop words
			
 
				+        :param stream: is stream response
			
 
				+        :param user: unique user id
			
 
				+        :return: full response or stream response chunk generator result
			
 
				+        """
			
 
				+        # initialize client
			
 
				+        client = cohere.Client(credentials.get('api_key'))
			
 
				+
			
 
				+        if user:
			
 
				+            model_parameters['user_name'] = user
			
 
				+
			
 
				+        message, chat_histories = self._convert_prompt_messages_to_message_and_chat_histories(prompt_messages)
			
 
				+
			
 
				+        # chat model
			
 
				+        real_model = model
			
 
				+        if self.get_model_schema(model, credentials).fetch_from == FetchFrom.PREDEFINED_MODEL:
			
 
				+            real_model = model.removesuffix('-chat')
			
 
				+
			
 
				+        response = client.chat(
			
 
				+            message=message,
			
 
				+            chat_history=chat_histories,
			
 
				+            model=real_model,
			
 
				+            stream=stream,
			
 
				+            return_preamble=True,
			
 
				+            **model_parameters,
			
 
				+        )
			
 
				+
			
 
				+        if stream:
			
 
				+            return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, stop)
			
 
				+
			
 
				+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages, stop)
			
 
				+
			
 
				+    def _handle_chat_generate_response(self, model: str, credentials: dict, response: Chat,
			
 
				+                                       prompt_messages: list[PromptMessage], stop: Optional[List[str]] = None) \
			
 
				+            -> LLMResult:
			
 
				+        """
			
 
				+        Handle llm chat response
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: credentials
			
 
				+        :param response: response
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param stop: stop words
			
 
				+        :return: llm response
			
 
				+        """
			
 
				+        assistant_text = response.text
			
 
				+
			
 
				+        # transform assistant message to prompt message
			
 
				+        assistant_prompt_message = AssistantPromptMessage(
			
 
				+            content=assistant_text
			
 
				+        )
			
 
				+
			
 
				+        # calculate num tokens
			
 
				+        prompt_tokens = self._num_tokens_from_messages(model, credentials, prompt_messages)
			
 
				+        completion_tokens = self._num_tokens_from_messages(model, credentials, [assistant_prompt_message])
			
 
				+
			
 
				+        # transform usage
			
 
				+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
			
 
				+
			
 
				+        if stop:
			
 
				+            # enforce stop tokens
			
 
				+            assistant_text = self.enforce_stop_tokens(assistant_text, stop)
			
 
				+            assistant_prompt_message = AssistantPromptMessage(
			
 
				+                content=assistant_text
			
 
				+            )
			
 
				+
			
 
				+        # transform response
			
 
				+        response = LLMResult(
			
 
				+            model=model,
			
 
				+            prompt_messages=prompt_messages,
			
 
				+            message=assistant_prompt_message,
			
 
				+            usage=usage,
			
 
				+            system_fingerprint=response.preamble
			
 
				+        )
			
 
				+
			
 
				+        return response
			
 
				+
			
 
				+    def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: StreamingChat,
			
 
				+                                              prompt_messages: list[PromptMessage],
			
 
				+                                              stop: Optional[List[str]] = None) -> Generator:
			
 
				+        """
			
 
				+        Handle llm chat stream response
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param response: response
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :param stop: stop words
			
 
				+        :return: llm response chunk generator
			
 
				+        """
			
 
				+
			
 
				+        def final_response(full_text: str, index: int, finish_reason: Optional[str] = None,
			
 
				+                           preamble: Optional[str] = None) -> LLMResultChunk:
			
 
				+            # calculate num tokens
			
 
				+            prompt_tokens = self._num_tokens_from_messages(model, credentials, prompt_messages)
			
 
				+
			
 
				+            full_assistant_prompt_message = AssistantPromptMessage(
			
 
				+                content=full_text
			
 
				+            )
			
 
				+            completion_tokens = self._num_tokens_from_messages(model, credentials, [full_assistant_prompt_message])
			
 
				+
			
 
				+            # transform usage
			
 
				+            usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
			
 
				+
			
 
				+            return LLMResultChunk(
			
 
				+                model=model,
			
 
				+                prompt_messages=prompt_messages,
			
 
				+                system_fingerprint=preamble,
			
 
				+                delta=LLMResultChunkDelta(
			
 
				+                    index=index,
			
 
				+                    message=AssistantPromptMessage(content=''),
			
 
				+                    finish_reason=finish_reason,
			
 
				+                    usage=usage
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+        index = 1
			
 
				+        full_assistant_content = ''
			
 
				+        for chunk in response:
			
 
				+            if isinstance(chunk, StreamTextGeneration):
			
 
				+                chunk = cast(StreamTextGeneration, chunk)
			
 
				+                text = chunk.text
			
 
				+
			
 
				+                if text is None:
			
 
				+                    continue
			
 
				+
			
 
				+                # transform assistant message to prompt message
			
 
				+                assistant_prompt_message = AssistantPromptMessage(
			
 
				+                    content=text
			
 
				+                )
			
 
				+
			
 
				+                # stop
			
 
				+                # notice: This logic can only cover few stop scenarios
			
 
				+                if stop and text in stop:
			
 
				+                    yield final_response(full_assistant_content, index, 'stop')
			
 
				+                    break
			
 
				+
			
 
				+                full_assistant_content += text
			
 
				+
			
 
				+                yield LLMResultChunk(
			
 
				+                    model=model,
			
 
				+                    prompt_messages=prompt_messages,
			
 
				+                    delta=LLMResultChunkDelta(
			
 
				+                        index=index,
			
 
				+                        message=assistant_prompt_message,
			
 
				+                    )
			
 
				+                )
			
 
				+
			
 
				+                index += 1
			
 
				+            elif isinstance(chunk, StreamEnd):
			
 
				+                chunk = cast(StreamEnd, chunk)
			
 
				+                yield final_response(full_assistant_content, index, chunk.finish_reason, response.preamble)
			
 
				+                index += 1
			
 
				+
			
 
				+    def _convert_prompt_messages_to_message_and_chat_histories(self, prompt_messages: list[PromptMessage]) \
			
 
				+            -> Tuple[str, list[dict]]:
			
 
				+        """
			
 
				+        Convert prompt messages to message and chat histories
			
 
				+        :param prompt_messages: prompt messages
			
 
				+        :return:
			
 
				+        """
			
 
				+        chat_histories = []
			
 
				+        for prompt_message in prompt_messages:
			
 
				+            chat_histories.append(self._convert_prompt_message_to_dict(prompt_message))
			
 
				+
			
 
				+        # get latest message from chat histories and pop it
			
 
				+        if len(chat_histories) > 0:
			
 
				+            latest_message = chat_histories.pop()
			
 
				+            message = latest_message['message']
			
 
				+        else:
			
 
				+            raise ValueError('Prompt messages is empty')
			
 
				+
			
 
				+        return message, chat_histories
			
 
				+
			
 
				+    def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
			
 
				+        """
			
 
				+        Convert PromptMessage to dict for Cohere model
			
 
				+        """
			
 
				+        if isinstance(message, UserPromptMessage):
			
 
				+            message = cast(UserPromptMessage, message)
			
 
				+            if isinstance(message.content, str):
			
 
				+                message_dict = {"role": "USER", "message": message.content}
			
 
				+            else:
			
 
				+                sub_message_text = ''
			
 
				+                for message_content in message.content:
			
 
				+                    if message_content.type == PromptMessageContentType.TEXT:
			
 
				+                        message_content = cast(TextPromptMessageContent, message_content)
			
 
				+                        sub_message_text += message_content.data
			
 
				+
			
 
				+                message_dict = {"role": "USER", "message": sub_message_text}
			
 
				+        elif isinstance(message, AssistantPromptMessage):
			
 
				+            message = cast(AssistantPromptMessage, message)
			
 
				+            message_dict = {"role": "CHATBOT", "message": message.content}
			
 
				+        elif isinstance(message, SystemPromptMessage):
			
 
				+            message = cast(SystemPromptMessage, message)
			
 
				+            message_dict = {"role": "USER", "message": message.content}
			
 
				+        else:
			
 
				+            raise ValueError(f"Got unknown type {message}")
			
 
				+
			
 
				+        if message.name is not None:
			
 
				+            message_dict["user_name"] = message.name
			
 
				+
			
 
				+        return message_dict
			
 
				+
			
 
				+    def _num_tokens_from_string(self, model: str, credentials: dict, text: str) -> int:
			
 
				+        """
			
 
				+        Calculate num tokens for text completion model.
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: credentials
			
 
				+        :param text: prompt text
			
 
				+        :return: number of tokens
			
 
				+        """
			
 
				+        # initialize client
			
 
				+        client = cohere.Client(credentials.get('api_key'))
			
 
				+
			
 
				+        response = client.tokenize(
			
 
				+            text=text,
			
 
				+            model=model
			
 
				+        )
			
 
				+
			
 
				+        return response.length
			
 
				+
			
 
				+    def _num_tokens_from_messages(self, model: str, credentials: dict, messages: List[PromptMessage]) -> int:
			
 
				+        """Calculate num tokens Cohere model."""
			
 
				+        messages = [self._convert_prompt_message_to_dict(m) for m in messages]
			
 
				+        message_strs = [f"{message['role']}: {message['message']}" for message in messages]
			
 
				+        message_str = "\n".join(message_strs)
			
 
				+
			
 
				+        real_model = model
			
 
				+        if self.get_model_schema(model, credentials).fetch_from == FetchFrom.PREDEFINED_MODEL:
			
 
				+            real_model = model.removesuffix('-chat')
			
 
				+
			
 
				+        return self._num_tokens_from_string(real_model, credentials, message_str)
			
 
				+
			
 
				+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
			
 
				+        """
			
 
				+            Cohere supports fine-tuning of their models. This method returns the schema of the base model
			
 
				+            but renamed to the fine-tuned model name.
			
 
				+
			
 
				+            :param model: model name
			
 
				+            :param credentials: credentials
			
 
				+
			
 
				+            :return: model schema
			
 
				+        """
			
 
				+        # get model schema
			
 
				+        models = self.predefined_models()
			
 
				+        model_map = {model.model: model for model in models}
			
 
				+
			
 
				+        mode = credentials.get('mode')
			
 
				+
			
 
				+        if mode == 'chat':
			
 
				+            base_model_schema = model_map['command-light-chat']
			
 
				+        else:
			
 
				+            base_model_schema = model_map['command-light']
			
 
				+
			
 
				+        base_model_schema = cast(AIModelEntity, base_model_schema)
			
 
				+
			
 
				+        base_model_schema_features = base_model_schema.features or []
			
 
				+        base_model_schema_model_properties = base_model_schema.model_properties or {}
			
 
				+        base_model_schema_parameters_rules = base_model_schema.parameter_rules or []
			
 
				+
			
 
				+        entity = AIModelEntity(
			
 
				+            model=model,
			
 
				+            label=I18nObject(
			
 
				+                zh_Hans=model,
			
 
				+                en_US=model
			
 
				+            ),
			
 
				+            model_type=ModelType.LLM,
			
 
				+            features=[feature for feature in base_model_schema_features],
			
 
				+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
			
 
				+            model_properties={
			
 
				+                key: property for key, property in base_model_schema_model_properties.items()
			
 
				+            },
			
 
				+            parameter_rules=[rule for rule in base_model_schema_parameters_rules],
			
 
				+            pricing=base_model_schema.pricing
			
 
				+        )
			
 
				+
			
 
				+        return entity
			
 
				+
			
 
				+    @property
			
 
				+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
			
 
				+        """
			
 
				+        Map model invoke error to unified error
			
 
				+        The key is the error type thrown to the caller
			
 
				+        The value is the error type thrown by the model,
			
 
				+        which needs to be converted into a unified error type for the caller.
			
 
				+
			
 
				+        :return: Invoke error mapping
			
 
				+        """
			
 
				+        return {
			
 
				+            InvokeConnectionError: [
			
 
				+                cohere.CohereConnectionError
			
 
				+            ],
			
 
				+            InvokeServerUnavailableError: [],
			
 
				+            InvokeRateLimitError: [],
			
 
				+            InvokeAuthorizationError: [],
			
 
				+            InvokeBadRequestError: [
			
 
				+                cohere.CohereAPIError,
			
 
				+                cohere.CohereError,
			
 
				+            ]
			
 
				+        }
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/__init__.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/__init__.py
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/_position.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/_position.yaml
@@ -0,0 +1,7 @@
 
				+- embed-multilingual-v3.0
			
 
				+- embed-multilingual-light-v3.0
			
 
				+- embed-english-v3.0
			
 
				+- embed-english-light-v3.0
			
 
				+- embed-multilingual-v2.0
			
 
				+- embed-english-v2.0
			
 
				+- embed-english-light-v2.0
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-light-v2.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-light-v2.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-english-light-v2.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 1024
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-light-v3.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-light-v3.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-english-light-v3.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 384
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-v2.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-v2.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-english-v2.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 4096
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-v3.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-english-v3.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-english-v3.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 1024
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-light-v3.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-light-v3.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-multilingual-light-v3.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 384
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-v2.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-v2.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-multilingual-v2.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 768
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-v3.0.yaml
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/embed-multilingual-v3.0.yaml
@@ -0,0 +1,9 @@
 
				+model: embed-multilingual-v3.0
			
 
				+model_type: text-embedding
			
 
				+model_properties:
			
 
				+  context_size: 1024
			
 
				+  max_chunks: 48
			
 
				+pricing:
			
 
				+  input: '0.1'
			
 
				+  unit: '0.000001'
			
 
				+  currency: USD
			
--- a/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/cohere/text_embedding/text_embedding.py
@@ -0,0 +1,234 @@
 
				+import time
			
 
				+from typing import Optional, Tuple
			
 
				+
			
 
				+import cohere
			
 
				+import numpy as np
			
 
				+from cohere.responses import Tokens
			
 
				+
			
 
				+from core.model_runtime.entities.model_entities import PriceType
			
 
				+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
			
 
				+from core.model_runtime.errors.invoke import InvokeConnectionError, InvokeServerUnavailableError, InvokeRateLimitError, \
			
 
				+    InvokeAuthorizationError, InvokeBadRequestError, InvokeError
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
			
 
				+
			
 
				+
			
 
				+class CohereTextEmbeddingModel(TextEmbeddingModel):
			
 
				+    """
			
 
				+    Model class for Cohere text embedding model.
			
 
				+    """
			
 
				+
			
 
				+    def _invoke(self, model: str, credentials: dict,
			
 
				+                texts: list[str], user: Optional[str] = None) \
			
 
				+            -> TextEmbeddingResult:
			
 
				+        """
			
 
				+        Invoke text embedding model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param texts: texts to embed
			
 
				+        :param user: unique user id
			
 
				+        :return: embeddings result
			
 
				+        """
			
 
				+        # get model properties
			
 
				+        context_size = self._get_context_size(model, credentials)
			
 
				+        max_chunks = self._get_max_chunks(model, credentials)
			
 
				+
			
 
				+        embeddings: list[list[float]] = [[] for _ in range(len(texts))]
			
 
				+        tokens = []
			
 
				+        indices = []
			
 
				+        used_tokens = 0
			
 
				+
			
 
				+        for i, text in enumerate(texts):
			
 
				+            tokenize_response = self._tokenize(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                text=text
			
 
				+            )
			
 
				+
			
 
				+            for j in range(0, tokenize_response.length, context_size):
			
 
				+                tokens += [tokenize_response.token_strings[j: j + context_size]]
			
 
				+                indices += [i]
			
 
				+
			
 
				+        batched_embeddings = []
			
 
				+        _iter = range(0, len(tokens), max_chunks)
			
 
				+
			
 
				+        for i in _iter:
			
 
				+            # call embedding model
			
 
				+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                texts=["".join(token) for token in tokens[i: i + max_chunks]]
			
 
				+            )
			
 
				+
			
 
				+            used_tokens += embedding_used_tokens
			
 
				+            batched_embeddings += embeddings_batch
			
 
				+
			
 
				+        results: list[list[list[float]]] = [[] for _ in range(len(texts))]
			
 
				+        num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
			
 
				+        for i in range(len(indices)):
			
 
				+            results[indices[i]].append(batched_embeddings[i])
			
 
				+            num_tokens_in_batch[indices[i]].append(len(tokens[i]))
			
 
				+
			
 
				+        for i in range(len(texts)):
			
 
				+            _result = results[i]
			
 
				+            if len(_result) == 0:
			
 
				+                embeddings_batch, embedding_used_tokens = self._embedding_invoke(
			
 
				+                    model=model,
			
 
				+                    credentials=credentials,
			
 
				+                    texts=[""]
			
 
				+                )
			
 
				+
			
 
				+                used_tokens += embedding_used_tokens
			
 
				+                average = embeddings_batch[0]
			
 
				+            else:
			
 
				+                average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
			
 
				+            embeddings[i] = (average / np.linalg.norm(average)).tolist()
			
 
				+
			
 
				+        # calc usage
			
 
				+        usage = self._calc_response_usage(
			
 
				+            model=model,
			
 
				+            credentials=credentials,
			
 
				+            tokens=used_tokens
			
 
				+        )
			
 
				+
			
 
				+        return TextEmbeddingResult(
			
 
				+            embeddings=embeddings,
			
 
				+            usage=usage,
			
 
				+            model=model
			
 
				+        )
			
 
				+
			
 
				+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
			
 
				+        """
			
 
				+        Get number of tokens for given prompt messages
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param texts: texts to embed
			
 
				+        :return:
			
 
				+        """
			
 
				+        if len(texts) == 0:
			
 
				+            return 0
			
 
				+
			
 
				+        full_text = ' '.join(texts)
			
 
				+
			
 
				+        try:
			
 
				+            response = self._tokenize(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                text=full_text
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            raise self._transform_invoke_error(e)
			
 
				+
			
 
				+        return response.length
			
 
				+
			
 
				+    def _tokenize(self, model: str, credentials: dict, text: str) -> Tokens:
			
 
				+        """
			
 
				+        Tokenize text
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param text: text to tokenize
			
 
				+        :return:
			
 
				+        """
			
 
				+        # initialize client
			
 
				+        client = cohere.Client(credentials.get('api_key'))
			
 
				+
			
 
				+        response = client.tokenize(
			
 
				+            text=text,
			
 
				+            model=model
			
 
				+        )
			
 
				+
			
 
				+        return response
			
 
				+
			
 
				+    def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				+        """
			
 
				+        Validate model credentials
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            # call embedding model
			
 
				+            self._embedding_invoke(
			
 
				+                model=model,
			
 
				+                credentials=credentials,
			
 
				+                texts=['ping']
			
 
				+            )
			
 
				+        except Exception as ex:
			
 
				+            raise CredentialsValidateFailedError(str(ex))
			
 
				+
			
 
				+    def _embedding_invoke(self, model: str, credentials: dict, texts: list[str]) -> Tuple[list[list[float]], int]:
			
 
				+        """
			
 
				+        Invoke embedding model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param texts: texts to embed
			
 
				+        :return: embeddings and used tokens
			
 
				+        """
			
 
				+        # initialize client
			
 
				+        client = cohere.Client(credentials.get('api_key'))
			
 
				+
			
 
				+        # call embedding model
			
 
				+        response = client.embed(
			
 
				+            texts=texts,
			
 
				+            model=model,
			
 
				+            input_type='search_document' if len(texts) > 1 else 'search_query'
			
 
				+        )
			
 
				+
			
 
				+        return response.embeddings, response.meta['billed_units']['input_tokens']
			
 
				+
			
 
				+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
			
 
				+        """
			
 
				+        Calculate response usage
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param tokens: input tokens
			
 
				+        :return: usage
			
 
				+        """
			
 
				+        # get input price info
			
 
				+        input_price_info = self.get_price(
			
 
				+            model=model,
			
 
				+            credentials=credentials,
			
 
				+            price_type=PriceType.INPUT,
			
 
				+            tokens=tokens
			
 
				+        )
			
 
				+
			
 
				+        # transform usage
			
 
				+        usage = EmbeddingUsage(
			
 
				+            tokens=tokens,
			
 
				+            total_tokens=tokens,
			
 
				+            unit_price=input_price_info.unit_price,
			
 
				+            price_unit=input_price_info.unit,
			
 
				+            total_price=input_price_info.total_amount,
			
 
				+            currency=input_price_info.currency,
			
 
				+            latency=time.perf_counter() - self.started_at
			
 
				+        )
			
 
				+
			
 
				+        return usage
			
 
				+
			
 
				+    @property
			
 
				+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
			
 
				+        """
			
 
				+        Map model invoke error to unified error
			
 
				+        The key is the error type thrown to the caller
			
 
				+        The value is the error type thrown by the model,
			
 
				+        which needs to be converted into a unified error type for the caller.
			
 
				+
			
 
				+        :return: Invoke error mapping
			
 
				+        """
			
 
				+        return {
			
 
				+            InvokeConnectionError: [
			
 
				+                cohere.CohereConnectionError
			
 
				+            ],
			
 
				+            InvokeServerUnavailableError: [],
			
 
				+            InvokeRateLimitError: [],
			
 
				+            InvokeAuthorizationError: [],
			
 
				+            InvokeBadRequestError: [
			
 
				+                cohere.CohereAPIError,
			
 
				+                cohere.CohereError,
			
 
				+            ]
			
 
				+        }
			
--- a/api/core/spiltter/fixed_text_splitter.py
+++ b/api/core/spiltter/fixed_text_splitter.py
@@ -24,6 +24,9 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
 
				             **kwargs: Any,
			
 
				     ):
			
 
				         def _token_encoder(text: str) -> int:
			
 
				+            if not text:
			
 
				+                return 0
			
 
				+
			
 
				             if embedding_model_instance:
			
 
				                 embedding_model_type_instance = embedding_model_instance.model_type_instance
			
 
				                 embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
			
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -54,7 +54,7 @@ zhipuai==1.0.7
 
				 werkzeug==2.3.8
			
 
				 pymilvus==2.3.0
			
 
				 qdrant-client==1.6.4
			
 
				-cohere~=4.32
			
 
				+cohere~=4.44
			
 
				 pyyaml~=6.0.1
			
 
				 numpy~=1.25.2
			
 
				 unstructured[docx,pptx,msg,md,ppt]~=0.10.27
			
--- a/api/tests/integration_tests/model_runtime/cohere/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/cohere/test_llm.py
@@ -0,0 +1,272 @@
 
				+import os
			
 
				+from typing import Generator
			
 
				+
			
 
				+import pytest
			
 
				+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
			
 
				+from core.model_runtime.entities.message_entities import (AssistantPromptMessage, SystemPromptMessage,
			
 
				+                                                          UserPromptMessage)
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.cohere.llm.llm import CohereLargeLanguageModel
			
 
				+
			
 
				+
			
 
				+def test_validate_credentials_for_chat_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    with pytest.raises(CredentialsValidateFailedError):
			
 
				+        model.validate_credentials(
			
 
				+            model='command-light-chat',
			
 
				+            credentials={
			
 
				+                'api_key': 'invalid_key'
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    model.validate_credentials(
			
 
				+        model='command-light-chat',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def test_validate_credentials_for_completion_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    with pytest.raises(CredentialsValidateFailedError):
			
 
				+        model.validate_credentials(
			
 
				+            model='command-light',
			
 
				+            credentials={
			
 
				+                'api_key': 'invalid_key'
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    model.validate_credentials(
			
 
				+        model='command-light',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def test_invoke_completion_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    credentials = {
			
 
				+        'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+    }
			
 
				+
			
 
				+    result = model.invoke(
			
 
				+        model='command-light',
			
 
				+        credentials=credentials,
			
 
				+        prompt_messages=[
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'max_tokens': 1
			
 
				+        },
			
 
				+        stream=False,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, LLMResult)
			
 
				+    assert len(result.message.content) > 0
			
 
				+    assert model._num_tokens_from_string('command-light', credentials, result.message.content) == 1
			
 
				+
			
 
				+
			
 
				+def test_invoke_stream_completion_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    result = model.invoke(
			
 
				+        model='command-light',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'max_tokens': 100
			
 
				+        },
			
 
				+        stream=True,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, Generator)
			
 
				+
			
 
				+    for chunk in result:
			
 
				+        assert isinstance(chunk, LLMResultChunk)
			
 
				+        assert isinstance(chunk.delta, LLMResultChunkDelta)
			
 
				+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
			
 
				+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
			
 
				+
			
 
				+
			
 
				+def test_invoke_chat_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    result = model.invoke(
			
 
				+        model='command-light-chat',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            SystemPromptMessage(
			
 
				+                content='You are a helpful AI assistant.',
			
 
				+            ),
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'p': 0.99,
			
 
				+            'presence_penalty': 0.0,
			
 
				+            'frequency_penalty': 0.0,
			
 
				+            'max_tokens': 10
			
 
				+        },
			
 
				+        stop=['How'],
			
 
				+        stream=False,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, LLMResult)
			
 
				+    assert len(result.message.content) > 0
			
 
				+
			
 
				+    for chunk in model._llm_result_to_stream(result):
			
 
				+        assert isinstance(chunk, LLMResultChunk)
			
 
				+        assert isinstance(chunk.delta, LLMResultChunkDelta)
			
 
				+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
			
 
				+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
			
 
				+
			
 
				+
			
 
				+def test_invoke_stream_chat_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    result = model.invoke(
			
 
				+        model='command-light-chat',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            SystemPromptMessage(
			
 
				+                content='You are a helpful AI assistant.',
			
 
				+            ),
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'max_tokens': 100
			
 
				+        },
			
 
				+        stream=True,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, Generator)
			
 
				+
			
 
				+    for chunk in result:
			
 
				+        assert isinstance(chunk, LLMResultChunk)
			
 
				+        assert isinstance(chunk.delta, LLMResultChunkDelta)
			
 
				+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
			
 
				+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
			
 
				+        if chunk.delta.finish_reason is not None:
			
 
				+            assert chunk.delta.usage is not None
			
 
				+            assert chunk.delta.usage.completion_tokens > 0
			
 
				+
			
 
				+
			
 
				+def test_get_num_tokens():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    num_tokens = model.get_num_tokens(
			
 
				+        model='command-light',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+    assert num_tokens == 3
			
 
				+
			
 
				+    num_tokens = model.get_num_tokens(
			
 
				+        model='command-light-chat',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            SystemPromptMessage(
			
 
				+                content='You are a helpful AI assistant.',
			
 
				+            ),
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+    assert num_tokens == 15
			
 
				+
			
 
				+
			
 
				+def test_fine_tuned_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    # test invoke
			
 
				+    result = model.invoke(
			
 
				+        model='85ec47be-6139-4f75-a4be-0f0ec1ef115c-ft',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY'),
			
 
				+            'mode': 'completion'
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            SystemPromptMessage(
			
 
				+                content='You are a helpful AI assistant.',
			
 
				+            ),
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'max_tokens': 100
			
 
				+        },
			
 
				+        stream=False,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, LLMResult)
			
 
				+
			
 
				+
			
 
				+def test_fine_tuned_chat_model():
			
 
				+    model = CohereLargeLanguageModel()
			
 
				+
			
 
				+    # test invoke
			
 
				+    result = model.invoke(
			
 
				+        model='94f2d55a-4c79-4c00-bde4-23962e74b170-ft',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY'),
			
 
				+            'mode': 'chat'
			
 
				+        },
			
 
				+        prompt_messages=[
			
 
				+            SystemPromptMessage(
			
 
				+                content='You are a helpful AI assistant.',
			
 
				+            ),
			
 
				+            UserPromptMessage(
			
 
				+                content='Hello World!'
			
 
				+            )
			
 
				+        ],
			
 
				+        model_parameters={
			
 
				+            'temperature': 0.0,
			
 
				+            'max_tokens': 100
			
 
				+        },
			
 
				+        stream=False,
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, LLMResult)
			
--- a/api/tests/integration_tests/model_runtime/cohere/test_text_embedding.py
+++ b/api/tests/integration_tests/model_runtime/cohere/test_text_embedding.py
@@ -0,0 +1,64 @@
 
				+import os
			
 
				+
			
 
				+import pytest
			
 
				+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.cohere.text_embedding.text_embedding import CohereTextEmbeddingModel
			
 
				+
			
 
				+
			
 
				+def test_validate_credentials():
			
 
				+    model = CohereTextEmbeddingModel()
			
 
				+
			
 
				+    with pytest.raises(CredentialsValidateFailedError):
			
 
				+        model.validate_credentials(
			
 
				+            model='embed-multilingual-v3.0',
			
 
				+            credentials={
			
 
				+                'api_key': 'invalid_key'
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    model.validate_credentials(
			
 
				+        model='embed-multilingual-v3.0',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def test_invoke_model():
			
 
				+    model = CohereTextEmbeddingModel()
			
 
				+
			
 
				+    result = model.invoke(
			
 
				+        model='embed-multilingual-v3.0',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        texts=[
			
 
				+            "hello",
			
 
				+            "world",
			
 
				+            " ".join(["long_text"] * 100),
			
 
				+            " ".join(["another_long_text"] * 100)
			
 
				+        ],
			
 
				+        user="abc-123"
			
 
				+    )
			
 
				+
			
 
				+    assert isinstance(result, TextEmbeddingResult)
			
 
				+    assert len(result.embeddings) == 4
			
 
				+    assert result.usage.total_tokens == 811
			
 
				+
			
 
				+
			
 
				+def test_get_num_tokens():
			
 
				+    model = CohereTextEmbeddingModel()
			
 
				+
			
 
				+    num_tokens = model.get_num_tokens(
			
 
				+        model='embed-multilingual-v3.0',
			
 
				+        credentials={
			
 
				+            'api_key': os.environ.get('COHERE_API_KEY')
			
 
				+        },
			
 
				+        texts=[
			
 
				+            "hello",
			
 
				+            "world"
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+    assert num_tokens == 3