Bladeren bron

[Fix] Sagemaker LLM Provider can't adjust context size, it'a always 2… (#13462)

Co-authored-by: Yuanbo Li <ybalbert@amazon.com>
ybalbert001 2 maanden geleden
bovenliggende
commit
c8357da13b

+ 2 - 2
api/core/model_runtime/model_providers/sagemaker/llm/llm.py

@@ -430,7 +430,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
                 type=ParameterType.INT,
                 use_template="max_tokens",
                 min=1,
-                max=credentials.get("context_length", 2048),
+                max=int(credentials.get("context_length", 2048)),
                 default=512,
                 label=I18nObject(zh_Hans="最大生成长度", en_US="Max Tokens"),
             ),
@@ -448,7 +448,7 @@ class SageMakerLargeLanguageModel(LargeLanguageModel):
         if support_vision:
             features.append(ModelFeature.VISION)
 
-        context_length = credentials.get("context_length", 2048)
+        context_length = int(credentials.get("context_length", 2048))
 
         entity = AIModelEntity(
             model=model,

+ 13 - 0
api/core/model_runtime/model_providers/sagemaker/sagemaker.yaml

@@ -59,6 +59,19 @@ model_credential_schema:
       placeholder:
         zh_Hans: 请输出你的Sagemaker推理端点
         en_US: Enter your Sagemaker Inference endpoint
+    - variable: context_length
+      show_on:
+        - variable: __model_type
+          value: llm
+      label:
+        zh_Hans: 模型上下文长度
+        en_US: Model context size
+      type: text-input
+      default: '4096'
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的模型上下文长度
+        en_US: Enter your Model context size
     - variable: audio_s3_cache_bucket
       show_on:
         - variable: __model_type