Ver Fonte

Upgrade oracle models (#13174)

Co-authored-by: engchina <atjapan2015@gmail.com>
engchina há 2 meses atrás
pai
commit
40dd63ecef

+ 52 - 0
api/core/model_runtime/model_providers/oci/llm/cohere.command-r-08-2024.yaml

@@ -0,0 +1,52 @@
+model: cohere.command-r-08-2024
+label:
+  en_US: cohere.command-r-08-2024 v1.7
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    max: 1.0
+  - name: topP
+    use_template: top_p
+    default: 0.75
+    min: 0
+    max: 1
+  - name: topK
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+    default: 0
+    min: 0
+    max: 500
+  - name: presencePenalty
+    use_template: presence_penalty
+    min: 0
+    max: 1
+    default: 0
+  - name: frequencyPenalty
+    use_template: frequency_penalty
+    min: 0
+    max: 1
+    default: 0
+  - name: maxTokens
+    use_template: max_tokens
+    default: 600
+    max: 4000
+pricing:
+  input: '0.0009'
+  output: '0.0009'
+  unit: '0.0001'
+  currency: USD

+ 1 - 0
api/core/model_runtime/model_providers/oci/llm/cohere.command-r-16k.yaml

@@ -50,3 +50,4 @@ pricing:
   output: '0.004'
   unit: '0.0001'
   currency: USD
+deprecated: true

+ 52 - 0
api/core/model_runtime/model_providers/oci/llm/cohere.command-r-plus-08-2024.yaml

@@ -0,0 +1,52 @@
+model: cohere.command-r-plus-08-2024
+label:
+  en_US: cohere.command-r-plus-08-2024 v1.6
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    max: 1.0
+  - name: topP
+    use_template: top_p
+    default: 0.75
+    min: 0
+    max: 1
+  - name: topK
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+    default: 0
+    min: 0
+    max: 500
+  - name: presencePenalty
+    use_template: presence_penalty
+    min: 0
+    max: 1
+    default: 0
+  - name: frequencyPenalty
+    use_template: frequency_penalty
+    min: 0
+    max: 1
+    default: 0
+  - name: maxTokens
+    use_template: max_tokens
+    default: 600
+    max: 4000
+pricing:
+  input: '0.0156'
+  output: '0.0156'
+  unit: '0.0001'
+  currency: USD

+ 1 - 0
api/core/model_runtime/model_providers/oci/llm/cohere.command-r-plus.yaml

@@ -50,3 +50,4 @@ pricing:
   output: '0.0219'
   unit: '0.0001'
   currency: USD
+deprecated: true

+ 4 - 4
api/core/model_runtime/model_providers/oci/llm/llm.py

@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
 
 request_template = {
     "compartmentId": "",
-    "servingMode": {"modelId": "cohere.command-r-plus", "servingType": "ON_DEMAND"},
+    "servingMode": {"modelId": "cohere.command-r-plus-08-2024", "servingType": "ON_DEMAND"},
     "chatRequest": {
         "apiFormat": "COHERE",
         # "preambleOverride": "You are a helpful assistant.",
@@ -60,19 +60,19 @@ oci_config_template = {
 class OCILargeLanguageModel(LargeLanguageModel):
     # https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm
     _supported_models = {
-        "meta.llama-3-70b-instruct": {
+        "meta.llama-3.1-70b-instruct": {
             "system": True,
             "multimodal": False,
             "tool_call": False,
             "stream_tool_call": False,
         },
-        "cohere.command-r-16k": {
+        "cohere.command-r-08-2024": {
             "system": True,
             "multimodal": False,
             "tool_call": True,
             "stream_tool_call": False,
         },
-        "cohere.command-r-plus": {
+        "cohere.command-r-plus-08-2024": {
             "system": True,
             "multimodal": False,
             "tool_call": True,

+ 1 - 0
api/core/model_runtime/model_providers/oci/llm/meta.llama-3-70b-instruct.yaml

@@ -49,3 +49,4 @@ pricing:
   output: '0.015'
   unit: '0.0001'
   currency: USD
+deprecated: true

+ 51 - 0
api/core/model_runtime/model_providers/oci/llm/meta.llama-3.1-70b-instruct.yaml

@@ -0,0 +1,51 @@
+model: meta.llama-3.1-70b-instruct
+label:
+  zh_Hans: meta.llama-3.1-70b-instruct
+  en_US: meta.llama-3.1-70b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 1
+    max: 2.0
+  - name: topP
+    use_template: top_p
+    default: 0.75
+    min: 0
+    max: 1
+  - name: topK
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+    default: 0
+    min: 0
+    max: 500
+  - name: presencePenalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: frequencyPenalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: maxTokens
+    use_template: max_tokens
+    default: 600
+    max: 4000
+pricing:
+  input: '0.0075'
+  output: '0.0075'
+  unit: '0.0001'
+  currency: USD

+ 2 - 2
api/core/model_runtime/model_providers/oci/oci.py

@@ -19,8 +19,8 @@ class OCIGENAIProvider(ModelProvider):
         try:
             model_instance = self.get_model_instance(ModelType.LLM)
 
-            # Use `cohere.command-r-plus` model for validate,
-            model_instance.validate_credentials(model="cohere.command-r-plus", credentials=credentials)
+            # Use `cohere.command-r-plus-08-2024` model for validate,
+            model_instance.validate_credentials(model="cohere.command-r-plus-08-2024", credentials=credentials)
         except CredentialsValidateFailedError as ex:
             raise ex
         except Exception as ex: