11 月之前 · bb7c62777d
--- a/api/core/model_runtime/model_providers/localai/localai.yaml
+++ b/api/core/model_runtime/model_providers/localai/localai.yaml
@@ -15,6 +15,7 @@ help:
 
				 supported_model_types:
			
 
				   - llm
			
 
				   - text-embedding
			
 
				+  - speech2text
			
 
				 configurate_methods:
			
 
				   - customizable-model
			
 
				 model_credential_schema:
			
@@ -57,6 +58,9 @@ model_credential_schema:
 
				         zh_Hans: 在此输入LocalAI的服务器地址，如 http://192.168.1.100:8080
			
 
				         en_US: Enter the url of your LocalAI, e.g. http://192.168.1.100:8080
			
 
				     - variable: context_size
			
 
				+      show_on:
			
 
				+        - variable: __model_type
			
 
				+          value: llm
			
 
				       label:
			
 
				         zh_Hans: 上下文大小
			
 
				         en_US: Context size
			
--- a/api/core/model_runtime/model_providers/localai/speech2text/__init__.py
+++ b/api/core/model_runtime/model_providers/localai/speech2text/__init__.py
--- a/api/core/model_runtime/model_providers/localai/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/localai/speech2text/speech2text.py
@@ -0,0 +1,101 @@
 
				+from typing import IO, Optional
			
 
				+
			
 
				+from requests import Request, Session
			
 
				+from yarl import URL
			
 
				+
			
 
				+from core.model_runtime.entities.common_entities import I18nObject
			
 
				+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
			
 
				+from core.model_runtime.errors.invoke import (
			
 
				+    InvokeAuthorizationError,
			
 
				+    InvokeBadRequestError,
			
 
				+    InvokeConnectionError,
			
 
				+    InvokeError,
			
 
				+    InvokeRateLimitError,
			
 
				+    InvokeServerUnavailableError,
			
 
				+)
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
			
 
				+
			
 
				+
			
 
				+class LocalAISpeech2text(Speech2TextModel):
			
 
				+    """
			
 
				+    Model class for Local AI Text to speech model.
			
 
				+    """
			
 
				+
			
 
				+    def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
			
 
				+        """
			
 
				+        Invoke large language model
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :param file: audio file
			
 
				+        :param user: unique user id
			
 
				+        :return: text for given audio file
			
 
				+        """
			
 
				+        
			
 
				+        url = str(URL(credentials['server_url']) / "v1/audio/transcriptions")
			
 
				+        data = {"model": model}
			
 
				+        files = {"file": file}
			
 
				+
			
 
				+        session = Session()
			
 
				+        request = Request("POST", url, data=data, files=files)
			
 
				+        prepared_request = session.prepare_request(request)
			
 
				+        response = session.send(prepared_request)
			
 
				+
			
 
				+        if 'error' in response.json():
			
 
				+            raise InvokeServerUnavailableError("Empty response")
			
 
				+
			
 
				+        return response.json()["text"]
			
 
				+
			
 
				+    def validate_credentials(self, model: str, credentials: dict) -> None:
			
 
				+        """
			
 
				+        Validate model credentials
			
 
				+
			
 
				+        :param model: model name
			
 
				+        :param credentials: model credentials
			
 
				+        :return:
			
 
				+        """
			
 
				+        try:
			
 
				+            audio_file_path = self._get_demo_file_path()
			
 
				+
			
 
				+            with open(audio_file_path, 'rb') as audio_file:
			
 
				+                self._invoke(model, credentials, audio_file)
			
 
				+        except Exception as ex:
			
 
				+            raise CredentialsValidateFailedError(str(ex))
			
 
				+
			
 
				+    @property
			
 
				+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
			
 
				+        return {
			
 
				+            InvokeConnectionError: [
			
 
				+                InvokeConnectionError
			
 
				+            ],
			
 
				+            InvokeServerUnavailableError: [
			
 
				+                InvokeServerUnavailableError
			
 
				+            ],
			
 
				+            InvokeRateLimitError: [
			
 
				+                InvokeRateLimitError
			
 
				+            ],
			
 
				+            InvokeAuthorizationError: [
			
 
				+                InvokeAuthorizationError
			
 
				+            ],
			
 
				+            InvokeBadRequestError: [
			
 
				+                InvokeBadRequestError
			
 
				+            ],
			
 
				+        }
			
 
				+
			
 
				+    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
			
 
				+        """
			
 
				+            used to define customizable model schema
			
 
				+        """
			
 
				+        entity = AIModelEntity(
			
 
				+            model=model,
			
 
				+            label=I18nObject(
			
 
				+                en_US=model
			
 
				+            ),
			
 
				+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
			
 
				+            model_type=ModelType.SPEECH2TEXT,
			
 
				+            model_properties={},
			
 
				+            parameter_rules=[]
			
 
				+        )
			
 
				+
			
 
				+        return entity
			
--- a/api/tests/integration_tests/model_runtime/localai/test_speech2text.py
+++ b/api/tests/integration_tests/model_runtime/localai/test_speech2text.py
@@ -0,0 +1,54 @@
 
				+import os
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from core.model_runtime.errors.validate import CredentialsValidateFailedError
			
 
				+from core.model_runtime.model_providers.localai.speech2text.speech2text import LocalAISpeech2text
			
 
				+
			
 
				+
			
 
				+def test_validate_credentials():
			
 
				+    model = LocalAISpeech2text()
			
 
				+
			
 
				+    with pytest.raises(CredentialsValidateFailedError):
			
 
				+        model.validate_credentials(
			
 
				+            model='whisper-1',
			
 
				+            credentials={
			
 
				+                'server_url': 'invalid_url'
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    model.validate_credentials(
			
 
				+        model='whisper-1',
			
 
				+        credentials={
			
 
				+            'server_url': os.environ.get('LOCALAI_SERVER_URL')
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def test_invoke_model():
			
 
				+    model = LocalAISpeech2text()
			
 
				+
			
 
				+    # Get the directory of the current file
			
 
				+    current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+
			
 
				+    # Get assets directory
			
 
				+    assets_dir = os.path.join(os.path.dirname(current_dir), 'assets')
			
 
				+
			
 
				+    # Construct the path to the audio file
			
 
				+    audio_file_path = os.path.join(assets_dir, 'audio.mp3')
			
 
				+
			
 
				+    # Open the file and get the file object
			
 
				+    with open(audio_file_path, 'rb') as audio_file:
			
 
				+        file = audio_file
			
 
				+
			
 
				+        result = model.invoke(
			
 
				+            model='whisper-1',
			
 
				+            credentials={
			
 
				+                'server_url': os.environ.get('LOCALAI_SERVER_URL')
			
 
				+            },
			
 
				+            file=file,
			
 
				+            user="abc-123"
			
 
				+        )
			
 
				+
			
 
				+        assert isinstance(result, str)
			
 
				+        assert result == '1, 2, 3, 4, 5, 6, 7, 8, 9, 10'