8 ヶ月前 · 56b43f62d1
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -2,6 +2,9 @@
 
				 - google/codegemma-7b
			
 
				 - google/recurrentgemma-2b
			
 
				 - meta/llama2-70b
			
 
				+- meta/llama-3.1-8b-instruct
			
 
				+- meta/llama-3.1-70b-instruct
			
 
				+- meta/llama-3.1-405b-instruct
			
 
				 - meta/llama3-8b-instruct
			
 
				 - meta/llama3-70b-instruct
			
 
				 - mistralai/mistral-large
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
@@ -0,0 +1,36 @@
 
				+model: meta/llama-3.1-405b-instruct
			
 
				+label:
			
 
				+  zh_Hans: meta/llama-3.1-405b-instruct
			
 
				+  en_US: meta/llama-3.1-405b-instruct
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 131072
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalt
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
@@ -0,0 +1,36 @@
 
				+model: meta/llama-3.1-70b-instruct
			
 
				+label:
			
 
				+  zh_Hans: meta/llama-3.1-70b-instruct
			
 
				+  en_US: meta/llama-3.1-70b-instruct
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 131072
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
@@ -0,0 +1,36 @@
 
				+model: meta/llama-3.1-8b-instruct
			
 
				+label:
			
 
				+  zh_Hans: meta/llama-3.1-8b-instruct
			
 
				+  en_US: meta/llama-3.1-8b-instruct
			
 
				+model_type: llm
			
 
				+features:
			
 
				+  - agent-thought
			
 
				+model_properties:
			
 
				+  mode: chat
			
 
				+  context_size: 131072
			
 
				+parameter_rules:
			
 
				+  - name: temperature
			
 
				+    use_template: temperature
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 0.5
			
 
				+  - name: top_p
			
 
				+    use_template: top_p
			
 
				+    min: 0
			
 
				+    max: 1
			
 
				+    default: 1
			
 
				+  - name: max_tokens
			
 
				+    use_template: max_tokens
			
 
				+    min: 1
			
 
				+    max: 4096
			
 
				+    default: 1024
			
 
				+  - name: frequency_penalty
			
 
				+    use_template: frequency_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
 
				+  - name: presence_penalty
			
 
				+    use_template: presence_penalty
			
 
				+    min: -2
			
 
				+    max: 2
			
 
				+    default: 0
			
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -31,6 +31,9 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
 
				         'meta/llama2-70b': '',
			
 
				         'meta/llama3-8b-instruct': '',
			
 
				         'meta/llama3-70b-instruct': '',
			
 
				+        'meta/llama-3.1-8b-instruct': '',
			
 
				+        'meta/llama-3.1-70b-instruct': '',
			
 
				+        'meta/llama-3.1-405b-instruct': '',
			
 
				         'google/recurrentgemma-2b': ''
			
 
				         
			
 
				     }