Explorar el Código

feat: Add response format support for openai compat models (#12240)

Co-authored-by: Gio Gutierrez <giovannygutierrez@gmail.com>
Giovanny Gutiérrez hace 3 meses
padre
commit
d7c0bc8c23
Se han modificado 20 ficheros con 242 adiciones y 0 borrados
  1. 12 0
      api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml
  2. 12 0
      api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml
  3. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml
  4. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml
  5. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml
  6. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml
  7. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml
  8. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml
  9. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml
  10. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml
  11. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml
  12. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml
  13. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml
  14. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml
  15. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml
  16. 12 0
      api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml
  17. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml
  18. 13 0
      api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml
  19. 3 0
      api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml
  20. 17 0
      api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/gemma-7b-it.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/gemma2-9b-it.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-405b-reasoning.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-70b-versatile.yaml

@@ -6,6 +6,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -19,6 +20,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.1-8b-instant.yaml

@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-text-preview.yaml

@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-11b-vision-preview.yaml

@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-1b-preview.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-3b-preview.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-text-preview.yaml

@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.2-90b-vision-preview.yaml

@@ -19,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.1'

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-specdec.yaml

@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 1024
     min: 1
     max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: "0.05"
   output: "0.1"

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama-3.3-70b-versatile.yaml

@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 131072
@@ -18,6 +19,18 @@ parameter_rules:
     default: 1024
     min: 1
     max: 32768
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: "0.05"
   output: "0.1"

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama-guard-3-8b.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.20'
   output: '0.20'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama2-70b-4096.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 4096
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.7'
   output: '0.8'

+ 12 - 0
api/core/model_runtime/model_providers/groq/llm/llama3-70b-8192.yaml

@@ -18,6 +18,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.59'
   output: '0.79'

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama3-8b-8192.yaml

@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.08'

+ 13 - 0
api/core/model_runtime/model_providers/groq/llm/llama3-groq-70b-8192-tool-use-preview.yaml

@@ -5,6 +5,7 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 8192
@@ -18,6 +19,18 @@ parameter_rules:
     default: 512
     min: 1
     max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
 pricing:
   input: '0.05'
   output: '0.08'

+ 3 - 0
api/core/model_runtime/model_providers/openai/llm/gpt-4o.yaml

@@ -37,6 +37,9 @@ parameter_rules:
     options:
       - text
       - json_object
+      - json_schema
+  - name: json_schema
+    use_template: json_schema
 pricing:
   input: '2.50'
   output: '10.00'

+ 17 - 0
api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py

@@ -332,6 +332,23 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
         if not endpoint_url.endswith("/"):
             endpoint_url += "/"
 
+        response_format = model_parameters.get("response_format")
+        if response_format:
+            if response_format == "json_schema":
+                json_schema = model_parameters.get("json_schema")
+                if not json_schema:
+                    raise ValueError("Must define JSON Schema when the response format is json_schema")
+                try:
+                    schema = json.loads(json_schema)
+                except:
+                    raise ValueError(f"not correct json_schema format: {json_schema}")
+                model_parameters.pop("json_schema")
+                model_parameters["response_format"] = {"type": "json_schema", "json_schema": schema}
+            else:
+                model_parameters["response_format"] = {"type": response_format}
+        elif "json_schema" in model_parameters:
+            del model_parameters["json_schema"]
+
         data = {"model": model, "stream": stream, **model_parameters}
 
         completion_type = LLMMode.value_of(credentials["mode"])