11 mēneši atpakaļ · a80fe20456
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -1,7 +1,11 @@
 
				 - google/gemma-7b
			
 
				 - google/codegemma-7b
			
 
				+- google/recurrentgemma-2b
			
 
				 - meta/llama2-70b
			
 
				 - meta/llama3-8b-instruct
			
 
				 - meta/llama3-70b-instruct
			
 
				+- mistralai/mistral-large
			
 
				 - mistralai/mixtral-8x7b-instruct-v0.1
			
 
				+- mistralai/mixtral-8x22b-instruct-v0.1
			
 
				 - fuyu-8b
			
 
				+- snowflake/arctic
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
@@ -0,0 +1,36 @@
 
				+model: snowflake/arctic
			
 
				+label:
			
 
				+  zh_Hans: snowflake/arctic
			
 
				+  en_US: snowflake/arctic
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 4000
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 1024
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -22,12 +22,16 @@ from core.model_runtime.utils import helper
 
				 class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
			
 
				     MODEL_SUFFIX_MAP = {
			
 
				         'fuyu-8b': 'vlm/adept/fuyu-8b',
			
 
				+        'mistralai/mistral-large': '',
			
 
				         'mistralai/mixtral-8x7b-instruct-v0.1': '',
			
 
				+        'mistralai/mixtral-8x22b-instruct-v0.1': '',
			
 
				         'google/gemma-7b': '',
			
 
				         'google/codegemma-7b': '',
			
 
				+        'snowflake/arctic':'',
			
 
				         'meta/llama2-70b': '',
			
 
				         'meta/llama3-8b-instruct': '',
			
 
				-        'meta/llama3-70b-instruct': ''
			
 
				+        'meta/llama3-70b-instruct': '',
			
 
				+        'google/recurrentgemma-2b': ''
			
 
				         
			
 
				     }
			
 
				 
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
@@ -0,0 +1,36 @@
 
				+model: mistralai/mistral-large
			
 
				+label:
			
 
				+  zh_Hans: mistralai/mistral-large
			
 
				+  en_US: mistralai/mistral-large
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 32000
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 1024
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
@@ -0,0 +1,36 @@
 
				+model: mistralai/mixtral-8x22b-instruct-v0.1
			
 
				+label:
			
 
				+  zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1
			
 
				+  en_US: mistralai/mixtral-8x22b-instruct-v0.1
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 64000
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 1024
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
@@ -0,0 +1,37 @@
 
				+model: google/recurrentgemma-2b
			
 
				+label:
			
 
				+  zh_Hans: google/recurrentgemma-2b
			
 
				+  en_US: google/recurrentgemma-2b
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 2048
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.2
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.7
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 1024
			
 
				+    default: 1024
			
 
				+  - name: random_seed
			
 
				+    type: int
			
 
				+    help:
			
 
				+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
			
 
				+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
			
 
				+    label:
			
 
				+      en_US: Seed
			
 
				+      zh_Hans: 种子
			
 
				+    default: 0
			
 
				+    min: 0
			
 
				+    max: 2147483647