Przeglądaj źródła

Support new Claude-3.5 Sonnet max token limit (#6335)

Richards Tu 9 miesięcy temu
rodzic
commit
4782fb50c4

+ 2 - 2
api/core/model_runtime/model_providers/anthropic/llm/claude-3-5-sonnet-20240620.yaml

@@ -27,9 +27,9 @@ parameter_rules:
   - name: max_tokens
     use_template: max_tokens
     required: true
-    default: 4096
+    default: 8192
     min: 1
-    max: 4096
+    max: 8192
   - name: response_format
     use_template: response_format
 pricing:

+ 8 - 1
api/core/model_runtime/model_providers/anthropic/llm/llm.py

@@ -113,6 +113,11 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
         if system:
             extra_model_kwargs['system'] = system
 
+        # Add the new header for claude-3-5-sonnet-20240620 model
+        headers = {}
+        if model == "claude-3-5-sonnet-20240620":
+            headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15"
+
         if tools:
             extra_model_kwargs['tools'] = [
                 self._transform_tool_prompt(tool) for tool in tools
@@ -121,6 +126,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
                 model=model,
                 messages=prompt_message_dicts,
                 stream=stream,
+                headers=headers,
                 **model_parameters,
                 **extra_model_kwargs
             )
@@ -130,6 +136,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
                 model=model,
                 messages=prompt_message_dicts,
                 stream=stream,
+                headers=headers,
                 **model_parameters,
                 **extra_model_kwargs
             )
@@ -138,7 +145,7 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
             return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages)
 
         return self._handle_chat_generate_response(model, credentials, response, prompt_messages)
-
+    
     def _code_block_mode_wrapper(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
                                  model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None,
                                  stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None,