Selaa lähdekoodia

add new provider Solar (#6884)

JuHyung Son 8 kuukautta sitten
vanhempi
commit
2e941bb91c
22 muutettua tiedostoa jossa 1315 lisäystä ja 2 poistoa
  1. 1 0
      api/core/model_runtime/model_providers/_position.yaml
  2. 0 0
      api/core/model_runtime/model_providers/upstage/__init__.py
  3. 1 0
      api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg
  4. 3 0
      api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg
  5. 57 0
      api/core/model_runtime/model_providers/upstage/_common.py
  6. 0 0
      api/core/model_runtime/model_providers/upstage/llm/__init__.py
  7. 1 0
      api/core/model_runtime/model_providers/upstage/llm/_position.yaml
  8. 575 0
      api/core/model_runtime/model_providers/upstage/llm/llm.py
  9. 43 0
      api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml
  10. 0 0
      api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py
  11. 9 0
      api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml
  12. 9 0
      api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml
  13. 195 0
      api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py
  14. 32 0
      api/core/model_runtime/model_providers/upstage/upstage.py
  15. 49 0
      api/core/model_runtime/model_providers/upstage/upstage.yaml
  16. 1 1
      api/docker/entrypoint.sh
  17. 1 0
      api/pyproject.toml
  18. 0 0
      api/tests/integration_tests/model_runtime/upstage/__init__.py
  19. 245 0
      api/tests/integration_tests/model_runtime/upstage/test_llm.py
  20. 23 0
      api/tests/integration_tests/model_runtime/upstage/test_provider.py
  21. 67 0
      api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py
  22. 3 1
      dev/pytest/pytest_model_runtime.sh

+ 1 - 0
api/core/model_runtime/model_providers/_position.yaml

@@ -6,6 +6,7 @@
 - nvidia
 - nvidia_nim
 - cohere
+- upstage
 - bedrock
 - togetherai
 - openrouter

+ 0 - 0
api/core/model_runtime/model_providers/upstage/__init__.py


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 1 - 0
api/core/model_runtime/model_providers/upstage/_assets/icon_l_en.svg


+ 3 - 0
api/core/model_runtime/model_providers/upstage/_assets/icon_s_en.svg

@@ -0,0 +1,3 @@
+<svg width="137" height="163" viewBox="0 0 137 163" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M104.652 29.325L103.017 35.0547H114.393L116.014 29.325H104.652ZM88.9956 39.352L87.346 45.0817H111.549L113.17 39.352H88.9956ZM72.8984 55.1088L74.5479 49.379H110.326L108.705 55.1088H72.8984ZM30.4937 59.4061L28.8442 65.1358H105.861L107.482 59.4061H30.4937ZM33.3802 75.1628L35.0298 69.4331H104.638L103.017 75.1628H33.3802ZM37.3478 135.325L38.9973 129.595H27.6069L26 135.325H37.3478ZM54.6682 119.568L53.0186 125.298H28.8442L30.4511 119.568H54.6682ZM67.4662 115.271L69.1157 109.541H33.2949L31.688 115.271H67.4662ZM113.17 99.5142L111.521 105.244H34.5322L36.1391 99.5142H113.17ZM106.97 95.2169L108.62 89.4871H38.9832L37.3763 95.2169H106.97ZM102.021 79.4601L100.372 85.1898H35.0724L36.6793 79.4601H102.021Z" fill="#805CFB"/>
+</svg>

+ 57 - 0
api/core/model_runtime/model_providers/upstage/_common.py

@@ -0,0 +1,57 @@
+
+from collections.abc import Mapping
+
+import openai
+from httpx import Timeout
+
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+
+
+class _CommonUpstage:
+    def _to_credential_kwargs(self, credentials: Mapping) -> dict:
+        """
+        Transform credentials to kwargs for model instance
+
+        :param credentials:
+        :return: 
+        """
+        credentials_kwargs = {
+            "api_key": credentials['upstage_api_key'],
+            "base_url": "https://api.upstage.ai/v1/solar",
+            "timeout": Timeout(315.0, read=300.0, write=20.0, connect=10.0),
+            "max_retries": 1
+        }
+
+        return credentials_kwargs
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        The key is the error type thrown to the caller
+        The value is the error type thrown by the model,
+        which needs to be converted into a unified error type for the caller.
+
+        :return: Invoke error mapping
+        """
+        return {
+            InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
+            InvokeServerUnavailableError: [openai.InternalServerError],
+            InvokeRateLimitError: [openai.RateLimitError],
+            InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
+            InvokeBadRequestError: [
+                openai.BadRequestError,
+                openai.NotFoundError,
+                openai.UnprocessableEntityError,
+                openai.APIError,
+            ],
+        }
+        
+

+ 0 - 0
api/core/model_runtime/model_providers/upstage/llm/__init__.py


+ 1 - 0
api/core/model_runtime/model_providers/upstage/llm/_position.yaml

@@ -0,0 +1 @@
+- soloar-1-mini-chat

+ 575 - 0
api/core/model_runtime/model_providers/upstage/llm/llm.py

@@ -0,0 +1,575 @@
+import logging
+from collections.abc import Generator
+from typing import Optional, Union, cast
+
+from openai import OpenAI, Stream
+from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
+from openai.types.chat.chat_completion_message import FunctionCall
+from tokenizers import Tokenizer
+
+from core.model_runtime.callbacks.base_callback import Callback
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    ImagePromptMessageContent,
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageTool,
+    SystemPromptMessage,
+    TextPromptMessageContent,
+    ToolPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+logger = logging.getLogger(__name__)
+
+UPSTAGE_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
+The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
+if you are not sure about the structure.
+
+<instructions>
+{{instructions}}
+</instructions>
+"""
+
+class UpstageLargeLanguageModel(_CommonUpstage, LargeLanguageModel):
+    """
+    Model class for Upstage large language model. 
+    """
+
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        """
+        Invoke large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param tools: tools for tool calling
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+
+        return self._chat_generate(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user
+        )
+
+    def _code_block_mode_wrapper(self,
+                                 model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, callbacks: Optional[list[Callback]] = None) -> Union[LLMResult, Generator]:
+        """
+        Code block mode wrapper for invoking large language model
+        """
+        if 'response_format' in model_parameters and model_parameters['response_format'] in ['JSON', 'XML']:
+            stop = stop or []
+            self._transform_chat_json_prompts(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user,
+                response_format=model_parameters['response_format']
+            )
+            model_parameters.pop('response_format')
+
+            return self._invoke(
+                model=model,
+                credentials=credentials,
+                prompt_messages=prompt_messages,
+                model_parameters=model_parameters,
+                tools=tools,
+                stop=stop,
+                stream=stream,
+                user=user
+            )
+
+    def _transform_chat_json_prompts(self, model: str, credentials: dict,
+                                     prompt_messages: list[PromptMessage], model_parameters: dict,
+                                     tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
+                                     stream: bool = True, user: str | None = None, response_format: str = 'JSON') -> None:
+        """
+        Transform json prompts 
+        """
+        if stop is None:
+            stop = []
+        if "```\n" not in stop:
+            stop.append("```\n")
+        if "\n```" not in stop:
+            stop.append("\n```")
+
+        if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
+            prompt_messages[0] = SystemPromptMessage(
+                content=UPSTAGE_BLOCK_MODE_PROMPT
+                    .replace("{{instructions}}", prompt_messages[0].content)
+                    .replace("{{block}}", response_format)
+            )
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
+        else:
+            prompt_messages.insert(0, SystemPromptMessage(
+                                   content=UPSTAGE_BLOCK_MODE_PROMPT
+                                       .replace("{{instructions}}", f"Please output a valid {response_format} object.")
+                                       .replace("{{block}}", response_format)
+            ))
+            prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
+
+    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return:
+        """
+        return self._num_tokens_from_messages(model, prompt_messages, tools)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            client.chat.completions.create(
+                messages=[{"role": "user", "content": "ping"}],
+                model=model,
+                temperature=0,
+                max_tokens=10,
+                stream=False
+            )
+        except Exception as e:
+            raise CredentialsValidateFailedError(str(e))
+
+    def _chat_generate(self, model: str, credentials: dict,
+                       prompt_messages: list[PromptMessage], model_parameters: dict,
+                       tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                       stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+
+        if tools:
+            extra_model_kwargs["functions"] = [{
+                "name": tool.name,
+                "description": tool.description,
+                "parameters": tool.parameters
+            } for tool in tools]
+
+        if stop:
+            extra_model_kwargs["stop"] = stop
+
+        if user:
+            extra_model_kwargs["user"] = user
+
+        # chat model
+        response = client.chat.completions.create(
+            messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
+            model=model,
+            stream=stream,
+            **model_parameters,
+            **extra_model_kwargs,
+        )
+
+        if stream:
+            return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
+        return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
+        
+    def _handle_chat_generate_response(self, model: str, credentials: dict, response: ChatCompletion,
+                                       prompt_messages: list[PromptMessage],
+                                       tools: Optional[list[PromptMessageTool]] = None) -> LLMResult:
+        """
+        Handle llm chat response
+
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response
+        """
+        assistant_message = response.choices[0].message
+        # assistant_message_tool_calls = assistant_message.tool_calls
+        assistant_message_function_call = assistant_message.function_call
+
+        # extract tool calls from response
+        # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+        function_call = self._extract_response_function_call(assistant_message_function_call)
+        tool_calls = [function_call] if function_call else []
+
+        # transform assistant message to prompt message
+        assistant_prompt_message = AssistantPromptMessage(
+            content=assistant_message.content,
+            tool_calls=tool_calls
+        )
+
+        # calculate num tokens
+        if response.usage:
+            # transform usage
+            prompt_tokens = response.usage.prompt_tokens
+            completion_tokens = response.usage.completion_tokens
+        else:
+            # calculate num tokens
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+            completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+        # transform response
+        response = LLMResult(
+            model=response.model,
+            prompt_messages=prompt_messages,
+            message=assistant_prompt_message,
+            usage=usage,
+            system_fingerprint=response.system_fingerprint,
+        )
+
+        return response
+
+    def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: Stream[ChatCompletionChunk],
+                                              prompt_messages: list[PromptMessage],
+                                              tools: Optional[list[PromptMessageTool]] = None) -> Generator:
+        """
+        Handle llm chat stream response
+
+        :param model: model name
+        :param response: response
+        :param prompt_messages: prompt messages
+        :param tools: tools for tool calling
+        :return: llm response chunk generator
+        """
+        full_assistant_content = ''
+        delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
+        prompt_tokens = 0
+        completion_tokens = 0
+        final_tool_calls = []
+        final_chunk = LLMResultChunk(
+            model=model,
+            prompt_messages=prompt_messages,
+            delta=LLMResultChunkDelta(
+                index=0,
+                message=AssistantPromptMessage(content=''),
+            )
+        )
+
+        for chunk in response:
+            if len(chunk.choices) == 0:
+                if chunk.usage:
+                    # calculate num tokens
+                    prompt_tokens = chunk.usage.prompt_tokens
+                    completion_tokens = chunk.usage.completion_tokens
+                continue
+
+            delta = chunk.choices[0]
+            has_finish_reason = delta.finish_reason is not None
+
+            if not has_finish_reason and (delta.delta.content is None or delta.delta.content == '') and \
+                delta.delta.function_call is None:
+                continue
+
+            # assistant_message_tool_calls = delta.delta.tool_calls
+            assistant_message_function_call = delta.delta.function_call
+
+            # extract tool calls from response
+            if delta_assistant_message_function_call_storage is not None:
+                # handle process of stream function call
+                if assistant_message_function_call:
+                    # message has not ended ever
+                    delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
+                    continue
+                else:
+                    # message has ended
+                    assistant_message_function_call = delta_assistant_message_function_call_storage
+                    delta_assistant_message_function_call_storage = None
+            else:
+                if assistant_message_function_call:
+                    # start of stream function call
+                    delta_assistant_message_function_call_storage = assistant_message_function_call
+                    if delta_assistant_message_function_call_storage.arguments is None:
+                        delta_assistant_message_function_call_storage.arguments = ''
+                    if not has_finish_reason:
+                        continue
+
+            # tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
+            function_call = self._extract_response_function_call(assistant_message_function_call)
+            tool_calls = [function_call] if function_call else []
+            if tool_calls:
+                final_tool_calls.extend(tool_calls)
+
+            # transform assistant message to prompt message
+            assistant_prompt_message = AssistantPromptMessage(
+                content=delta.delta.content if delta.delta.content else '',
+                tool_calls=tool_calls
+            )
+
+            full_assistant_content += delta.delta.content if delta.delta.content else ''
+
+            if has_finish_reason:
+                final_chunk = LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                        finish_reason=delta.finish_reason,
+                    )
+                )
+            else:
+                yield LLMResultChunk(
+                    model=chunk.model,
+                    prompt_messages=prompt_messages,
+                    system_fingerprint=chunk.system_fingerprint,
+                    delta=LLMResultChunkDelta(
+                        index=delta.index,
+                        message=assistant_prompt_message,
+                    )
+                )
+
+        if not prompt_tokens:
+            prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
+
+        if not completion_tokens:
+            full_assistant_prompt_message = AssistantPromptMessage(
+                content=full_assistant_content,
+                tool_calls=final_tool_calls
+            )
+            completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+        final_chunk.delta.usage = usage
+
+        yield final_chunk
+
+    def _extract_response_tool_calls(self,
+                                     response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
+            -> list[AssistantPromptMessage.ToolCall]:
+        """
+        Extract tool calls from response
+
+        :param response_tool_calls: response tool calls
+        :return: list of tool calls
+        """
+        tool_calls = []
+        if response_tool_calls:
+            for response_tool_call in response_tool_calls:
+                function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                    name=response_tool_call.function.name,
+                    arguments=response_tool_call.function.arguments
+                )
+
+                tool_call = AssistantPromptMessage.ToolCall(
+                    id=response_tool_call.id,
+                    type=response_tool_call.type,
+                    function=function
+                )
+                tool_calls.append(tool_call)
+
+        return tool_calls
+
+    def _extract_response_function_call(self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall) \
+            -> AssistantPromptMessage.ToolCall:
+        """
+        Extract function call from response
+
+        :param response_function_call: response function call
+        :return: tool call
+        """
+        tool_call = None
+        if response_function_call:
+            function = AssistantPromptMessage.ToolCall.ToolCallFunction(
+                name=response_function_call.name,
+                arguments=response_function_call.arguments
+            )
+
+            tool_call = AssistantPromptMessage.ToolCall(
+                id=response_function_call.name,
+                type="function",
+                function=function
+            )
+
+        return tool_call
+
+    def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+        """
+        Convert PromptMessage to dict for Upstage API
+        """
+        if isinstance(message, UserPromptMessage):
+            message = cast(UserPromptMessage, message)
+            if isinstance(message.content, str):
+                message_dict = {"role": "user", "content": message.content}
+            else:
+                sub_messages = []
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_content = cast(TextPromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "text",
+                            "text": message_content.data
+                        }
+                        sub_messages.append(sub_message_dict)
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        message_content = cast(ImagePromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": message_content.data,
+                                "detail": message_content.detail.value
+                            }
+                        }
+                        sub_messages.append(sub_message_dict)
+
+                message_dict = {"role": "user", "content": sub_messages}
+        elif isinstance(message, AssistantPromptMessage):
+            message = cast(AssistantPromptMessage, message)
+            message_dict = {"role": "assistant", "content": message.content}
+            if message.tool_calls:
+                # message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+                #                               message.tool_calls]
+                function_call = message.tool_calls[0]
+                message_dict["function_call"] = {
+                    "name": function_call.function.name,
+                    "arguments": function_call.function.arguments,
+                }
+        elif isinstance(message, SystemPromptMessage):
+            message = cast(SystemPromptMessage, message)
+            message_dict = {"role": "system", "content": message.content}
+        elif isinstance(message, ToolPromptMessage):
+            message = cast(ToolPromptMessage, message)
+            # message_dict = {
+            #     "role": "tool",
+            #     "content": message.content,
+            #     "tool_call_id": message.tool_call_id
+            # }
+            message_dict = {
+                "role": "function",
+                "content": message.content,
+                "name": message.tool_call_id
+            }
+        else:
+            raise ValueError(f"Got unknown type {message}")
+
+        if message.name:
+            message_dict["name"] = message.name
+
+        return message_dict
+
+    def _get_tokenizer(self) -> Tokenizer:
+        return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+    def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage],
+                                  tools: Optional[list[PromptMessageTool]] = None) -> int:
+        """
+        Calculate num tokens for solar with Huggingface Solar tokenizer.
+        Solar tokenizer is opened in huggingface https://huggingface.co/upstage/solar-1-mini-tokenizer 
+        """
+        tokenizer = self._get_tokenizer()
+        tokens_per_message = 5 # <|im_start|>{role}\n{message}<|im_end|>
+        tokens_prefix = 1 # <|startoftext|>
+        tokens_suffix = 3 # <|im_start|>assistant\n
+
+        num_tokens = 0
+        num_tokens += tokens_prefix
+
+        messages_dict = [self._convert_prompt_message_to_dict(message) for message in messages]
+        for message in messages_dict:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                if isinstance(value, list):
+                    text = ''
+                    for item in value:
+                        if isinstance(item, dict) and item['type'] == 'text':
+                            text += item['text']
+                    value = text
+
+                if key == "tool_calls":
+                    for tool_call in value:
+                        for t_key, t_value in tool_call.items():
+                            num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+                            if t_key == "function":
+                                for f_key, f_value in t_value.items():
+                                    num_tokens += len(tokenizer.encode(f_key, add_special_tokens=False))
+                                    num_tokens += len(tokenizer.encode(f_value, add_special_tokens=False))
+                            else:
+                                num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
+                                num_tokens += len(tokenizer.encode(t_value, add_special_tokens=False))
+                else:
+                    num_tokens += len(tokenizer.encode(str(value), add_special_tokens=False))
+        num_tokens += tokens_suffix
+
+        if tools:
+            num_tokens += self._num_tokens_for_tools(tokenizer, tools)
+
+        return num_tokens
+
+    def _num_tokens_for_tools(self, tokenizer: Tokenizer, tools: list[PromptMessageTool]) -> int:
+        """
+        Calculate num tokens for tool calling with upstage tokenizer.
+
+        :param tokenizer: huggingface tokenizer
+        :param tools: tools for tool calling
+        :return: number of tokens
+        """
+        num_tokens = 0
+        for tool in tools:
+            num_tokens += len(tokenizer.encode('type'))
+            num_tokens += len(tokenizer.encode('function'))
+
+            # calculate num tokens for function object
+            num_tokens += len(tokenizer.encode('name'))
+            num_tokens += len(tokenizer.encode(tool.name))
+            num_tokens += len(tokenizer.encode('description'))
+            num_tokens += len(tokenizer.encode(tool.description))
+            parameters = tool.parameters
+            num_tokens += len(tokenizer.encode('parameters'))
+            if 'title' in parameters:
+                num_tokens += len(tokenizer.encode('title'))
+                num_tokens += len(tokenizer.encode(parameters.get("title")))
+            num_tokens += len(tokenizer.encode('type'))
+            num_tokens += len(tokenizer.encode(parameters.get("type")))
+            if 'properties' in parameters:
+                num_tokens += len(tokenizer.encode('properties'))
+                for key, value in parameters.get('properties').items():
+                    num_tokens += len(tokenizer.encode(key))
+                    for field_key, field_value in value.items():
+                        num_tokens += len(tokenizer.encode(field_key))
+                        if field_key == 'enum':
+                            for enum_field in field_value:
+                                num_tokens += 3
+                                num_tokens += len(tokenizer.encode(enum_field))
+                        else:
+                            num_tokens += len(tokenizer.encode(field_key))
+                            num_tokens += len(tokenizer.encode(str(field_value)))
+            if 'required' in parameters:
+                num_tokens += len(tokenizer.encode('required'))
+                for required_field in parameters['required']:
+                    num_tokens += 3
+                    num_tokens += len(tokenizer.encode(required_field))
+
+        return num_tokens

+ 43 - 0
api/core/model_runtime/model_providers/upstage/llm/solar-1-mini-chat.yaml

@@ -0,0 +1,43 @@
+model: solar-1-mini-chat
+label:
+  zh_Hans: solar-1-mini-chat
+  en_US: solar-1-mini-chat
+  ko_KR: solar-1-mini-chat
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 32768
+  - name: seed
+    label:
+      zh_Hans: 种子
+      en_US: Seed
+    type: int
+    help:
+      zh_Hans:
+        如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
+        响应参数来监视变化。
+      en_US:
+        If specified, model will make a best effort to sample deterministically,
+        such that repeated requests with the same seed and parameters should return
+        the same result. Determinism is not guaranteed, and you should refer to the
+        system_fingerprint response parameter to monitor changes in the backend.
+    required: false
+pricing:
+  input: "0.5"
+  output: "0.5"
+  unit: "0.000001"
+  currency: USD

+ 0 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/__init__.py


+ 9 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-passage.yaml

@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-passage
+model_type: text-embedding
+model_properties:
+  context_size: 4000
+  max_chunks: 32
+pricing:
+  input: '0.1'
+  unit: '0.000001'
+  currency: 'USD'

+ 9 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/solar-embedding-1-large-query.yaml

@@ -0,0 +1,9 @@
+model: solar-embedding-1-large-query
+model_type: text-embedding
+model_properties:
+  context_size: 4000
+  max_chunks: 32
+pricing:
+  input: '0.1'
+  unit: '0.000001'
+  currency: 'USD'

+ 195 - 0
api/core/model_runtime/model_providers/upstage/text_embedding/text_embedding.py

@@ -0,0 +1,195 @@
+import base64
+import time
+from collections.abc import Mapping
+from typing import Union
+
+import numpy as np
+from openai import OpenAI
+from tokenizers import Tokenizer
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+from core.model_runtime.model_providers.upstage._common import _CommonUpstage
+
+
+class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
+    """
+    Model class for Upstage text embedding model.
+    """
+    def _get_tokenizer(self) -> Tokenizer:
+        return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
+
+    def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :return: embeddings result
+        """
+
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        client = OpenAI(**credentials_kwargs)
+
+        extra_model_kwargs = {}
+        if user:
+            extra_model_kwargs["user"] = user
+        extra_model_kwargs["encoding_format"] = "base64"
+
+        context_size = self._get_context_size(model, credentials)
+        max_chunks = self._get_max_chunks(model, credentials)
+
+        embeddings: list[list[float]] = [[] for _ in range(len(texts))]
+        tokens = []
+        indices = []
+        used_tokens = 0
+
+        tokenizer = self._get_tokenizer()
+
+        for i, text in enumerate(texts):
+            token = tokenizer.encode(text, add_special_tokens=False).tokens
+            for j in range(0, len(token), context_size):
+                tokens += [token[j:j+context_size]]
+                indices += [i]
+        
+        batched_embeddings = []
+        _iter = range(0, len(tokens), max_chunks)
+
+        for i in _iter:
+            embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=tokens[i:i+max_chunks],
+                extra_model_kwargs=extra_model_kwargs,
+            )
+
+            used_tokens += embedding_used_tokens
+            batched_embeddings += embeddings_batch
+        
+        results: list[list[list[float]]] = [[] for _ in range(len(texts))]
+        num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
+
+        for i in range(len(indices)):
+            results[indices[i]].append(batched_embeddings[i])
+            num_tokens_in_batch[indices[i]].append(len(tokens[i]))
+        
+        for i in range(len(texts)):
+            _result = results[i]
+            if len(_result) == 0:
+                embeddings_batch, embedding_used_tokens = self._embedding_invoke(
+                    model=model,
+                    client=client,
+                    texts=[texts[i]],
+                    extra_model_kwargs=extra_model_kwargs,
+                )
+                used_tokens += embedding_used_tokens
+                average = embeddings_batch[0]
+            else:
+                average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
+            embeddings[i] = (average / np.linalg.norm(average)).tolist()
+        
+        usage = self._calc_response_usage(
+            model=model,
+            credentials=credentials,
+            tokens=used_tokens
+        )
+
+        return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
+    
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        tokenizer = self._get_tokenizer()
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        if len(texts) == 0:
+            return 0
+
+        tokenizer = self._get_tokenizer()
+
+        total_num_tokens = 0
+        for text in texts:
+            # calculate the number of tokens in the encoded text
+            tokenized_text = tokenizer.encode(text)
+            total_num_tokens += len(tokenized_text)
+
+        return total_num_tokens
+    
+    def validate_credentials(self, model: str, credentials: Mapping) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            client = OpenAI(**credentials_kwargs)
+
+            # call embedding model
+            self._embedding_invoke(
+                model=model,
+                client=client,
+                texts=['ping'],
+                extra_model_kwargs={}
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+    
+    def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict) -> tuple[list[list[float]], int]:
+        """
+        Invoke embedding model
+        :param model: model name
+        :param client: model client
+        :param texts: texts to embed
+        :param extra_model_kwargs: extra model kwargs
+        :return: embeddings and used tokens
+        """
+        response = client.embeddings.create(
+            model=model,
+            input=texts,
+            **extra_model_kwargs
+        )
+
+        if 'encoding_format' in extra_model_kwargs and extra_model_kwargs['encoding_format'] == 'base64':
+            return ([list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32)) for embedding in response.data], response.usage.total_tokens)
+        
+        return [data.embedding for data in response.data], response.usage.total_tokens
+    
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            tokens=tokens,
+            price_type=PriceType.INPUT
+        )
+
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage

+ 32 - 0
api/core/model_runtime/model_providers/upstage/upstage.py

@@ -0,0 +1,32 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class UpstageProvider(ModelProvider):
+    
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials from defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            model_instance.validate_credentials(
+                model="solar-1-mini-chat",
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as e:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise e
+        except Exception as e:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise e
+                

+ 49 - 0
api/core/model_runtime/model_providers/upstage/upstage.yaml

@@ -0,0 +1,49 @@
+provider: upstage
+label:
+  en_US: Upstage
+description:
+  en_US: Models provided by Upstage, such as Solar-1-mini-chat.
+  zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat.
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.svg
+background: "#FFFFF"
+help:
+  title:
+    en_US: Get your API Key from Upstage
+    zh_Hans: 从 Upstage 获取 API Key
+  url:
+    en_US: https://console.upstage.ai/api-keys
+supported_model_types:
+  - llm
+  - text-embedding
+configurate_methods:
+  - predefined-model
+model_credential_schema:
+  model:
+    label:
+      en_US: Model Name
+      zh_Hans: 模型名称
+    placeholder:
+      en_US: Enter your model name
+      zh_Hans: 输入模型名称
+  credential_form_schemas:
+    - variable: upstage_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: upstage_api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key

+ 1 - 1
api/docker/entrypoint.sh

@@ -4,7 +4,7 @@ set -e
 
 if [[ "${MIGRATION_ENABLED}" == "true" ]]; then
   echo "Running migrations"
-  flask upgrade-db
+  flask db upgrade
 fi
 
 if [[ "${MODE}" == "worker" ]]; then

+ 1 - 0
api/pyproject.toml

@@ -73,6 +73,7 @@ quote-style = "single"
 
 [tool.pytest_env]
 OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
+UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
 AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
 AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
 ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"

+ 0 - 0
api/tests/integration_tests/model_runtime/upstage/__init__.py


+ 245 - 0
api/tests/integration_tests/model_runtime/upstage/test_llm.py

@@ -0,0 +1,245 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.upstage.llm.llm import UpstageLargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+def test_predefined_models():
+    model = UpstageLargeLanguageModel()
+    model_schemas = model.predefined_models()
+
+    assert len(model_schemas) >= 1
+    assert isinstance(model_schemas[0], AIModelEntity)
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        # model name to gpt-3.5-turbo because of mocking
+        model.validate_credentials(
+            model='gpt-3.5-turbo',
+            credentials={
+                'upstage_api_key': 'invalid_key'
+            }
+        )
+
+    model.validate_credentials(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'top_p': 1.0,
+            'presence_penalty': 0.0,
+            'frequency_penalty': 0.0,
+            'max_tokens': 10
+        },
+        stop=['How'],
+        stream=False,
+        user="abc-123"
+    )
+
+    assert isinstance(result, LLMResult)
+    assert len(result.message.content) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_chat_model_with_tools(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content="what's the weather today in London?",
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'max_tokens': 100
+        },
+        tools=[
+            PromptMessageTool(
+                name='get_weather',
+                description='Determine weather in my location',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "The city and state e.g. San Francisco, CA"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "c",
+                          "f"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                  }
+            ),
+            PromptMessageTool(
+                name='get_stock_price',
+                description='Get the current stock price',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "symbol": {
+                        "type": "string",
+                        "description": "The stock symbol"
+                      }
+                    },
+                    "required": [
+                      "symbol"
+                    ]
+                  }
+            )
+        ],
+        stream=False,
+        user="abc-123"
+    )
+
+    assert isinstance(result, LLMResult)
+    assert isinstance(result.message, AssistantPromptMessage)
+    assert len(result.message.tool_calls) > 0
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock):
+    model = UpstageLargeLanguageModel()
+
+    result = model.invoke(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.0,
+            'max_tokens': 100
+        },
+        stream=True,
+        user="abc-123"
+    )
+
+    assert isinstance(result, Generator)
+
+    for chunk in result:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+        if chunk.delta.finish_reason is not None:
+            assert chunk.delta.usage is not None
+            assert chunk.delta.usage.completion_tokens > 0
+
+
+def test_get_num_tokens():
+    model = UpstageLargeLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ]
+    )
+
+    assert num_tokens == 13
+
+    num_tokens = model.get_num_tokens(
+        model='solar-1-mini-chat',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        tools=[
+            PromptMessageTool(
+                name='get_weather',
+                description='Determine weather in my location',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "The city and state e.g. San Francisco, CA"
+                      },
+                      "unit": {
+                        "type": "string",
+                        "enum": [
+                          "c",
+                          "f"
+                        ]
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                }
+            ),
+        ]
+    )
+
+    assert num_tokens == 106

+ 23 - 0
api/tests/integration_tests/model_runtime/upstage/test_provider.py

@@ -0,0 +1,23 @@
+import os
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.upstage import UpstageProvider
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
+def test_validate_provider_credentials(setup_openai_mock):
+    provider = UpstageProvider()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        provider.validate_provider_credentials(
+            credentials={}
+        )
+
+    provider.validate_provider_credentials(
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )

+ 67 - 0
api/tests/integration_tests/model_runtime/upstage/test_text_embedding.py

@@ -0,0 +1,67 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.upstage.text_embedding.text_embedding import UpstageTextEmbeddingModel
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_validate_credentials(setup_openai_mock):
+    model = UpstageTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='solar-embedding-1-large-passage',
+            credentials={
+                'upstage_api_key': 'invalid_key'
+            }
+        )
+
+    model.validate_credentials(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
+def test_invoke_model(setup_openai_mock):
+    model = UpstageTextEmbeddingModel()
+
+    result = model.invoke(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+        },
+        texts=[
+            "hello",
+            "world",
+            " ".join(["long_text"] * 100),
+            " ".join(["another_long_text"] * 100)
+        ],
+        user="abc-123"
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert len(result.embeddings) == 4
+    assert result.usage.total_tokens == 2
+
+
+def test_get_num_tokens():
+    model = UpstageTextEmbeddingModel()
+
+    num_tokens = model.get_num_tokens(
+        model='solar-embedding-1-large-passage',
+        credentials={
+            'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
+        },
+        texts=[
+            "hello",
+            "world"
+        ]
+    )
+
+    assert num_tokens == 5

+ 3 - 1
dev/pytest/pytest_model_runtime.sh

@@ -5,4 +5,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
   api/tests/integration_tests/model_runtime/azure_openai \
   api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
   api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
-  api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
+  api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
+  api/tests/integration_tests/model_runtime/upstage
+

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä