7 mesiacov pred · c8b9bdebfe
--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -19,7 +19,6 @@ from openai.types.chat.chat_completion_message import FunctionCall
 
				 from openai.types.completion import Completion
			
 
				 from xinference_client.client.restful.restful_client import (
			
 
				     Client,
			
 
				-    RESTfulChatglmCppChatModelHandle,
			
 
				     RESTfulChatModelHandle,
			
 
				     RESTfulGenerateModelHandle,
			
 
				 )
			
@@ -491,7 +490,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
 
				         if tools and len(tools) > 0:
			
 
				             generate_config["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools]
			
 
				         vision = credentials.get("support_vision", False)
			
 
				-        if isinstance(xinference_model, RESTfulChatModelHandle | RESTfulChatglmCppChatModelHandle):
			
 
				+        if isinstance(xinference_model, RESTfulChatModelHandle):
			
 
				             resp = client.chat.completions.create(
			
 
				                 model=credentials["model_uid"],
			
 
				                 messages=[self._convert_prompt_message_to_dict(message) for message in prompt_messages],
			
--- a/api/core/model_runtime/model_providers/xinference/tts/tts.py
+++ b/api/core/model_runtime/model_providers/xinference/tts/tts.py
@@ -208,21 +208,21 @@ class XinferenceText2SpeechModel(TTSModel):
 
				                 executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
			
 
				                 futures = [
			
 
				                     executor.submit(
			
 
				-                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=False
			
 
				+                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=True
			
 
				                     )
			
 
				                     for i in range(len(sentences))
			
 
				                 ]
			
 
				 
			
 
				                 for future in futures:
			
 
				                     response = future.result()
			
 
				-                    for i in range(0, len(response), 1024):
			
 
				-                        yield response[i : i + 1024]
			
 
				+                    for chunk in response:
			
 
				+                        yield chunk
			
 
				             else:
			
 
				                 response = handle.speech(
			
 
				-                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=False
			
 
				+                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=True
			
 
				                 )
			
 
				 
			
 
				-                for i in range(0, len(response), 1024):
			
 
				-                    yield response[i : i + 1024]
			
 
				+                for chunk in response:
			
 
				+                    yield chunk
			
 
				         except Exception as ex:
			
 
				             raise InvokeBadRequestError(str(ex))
			
--- a/api/poetry.lock
+++ b/api/poetry.lock
@@ -10014,13 +10014,13 @@ h11 = ">=0.9.0,<1"
 
				 
			
 
				 [[package]]
			
 
				 name = "xinference-client"
			
 
				-version = "0.13.3"
			
 
				+version = "0.15.2"
			
 
				 description = "Client for Xinference"
			
 
				 optional = false
			
 
				 python-versions = "*"
			
 
				 files = [
			
 
				-    {file = "xinference-client-0.13.3.tar.gz", hash = "sha256:822b722100affdff049c27760be7d62ac92de58c87a40d3361066df446ba648f"},
			
 
				-    {file = "xinference_client-0.13.3-py3-none-any.whl", hash = "sha256:f0eff3858b1ebcef2129726f82b09259c177e11db466a7ca23def3d4849c419f"},
			
 
				+    {file = "xinference-client-0.15.2.tar.gz", hash = "sha256:5c2259bb133148d1cc9bd2b8ec6eb8b5bbeba7f11d6252959f4e6cd79baa53ed"},
			
 
				+    {file = "xinference_client-0.15.2-py3-none-any.whl", hash = "sha256:b6275adab695e75e75a33e21e0ad212488fc2d5a4d0f693d544c0e78469abbe3"},
			
 
				 ]
			
 
				 
			
 
				 [package.dependencies]
			
@@ -10422,4 +10422,4 @@ cffi = ["cffi (>=1.11)"]
 
				 [metadata]
			
 
				 lock-version = "2.0"
			
 
				 python-versions = ">=3.10,<3.13"
			
 
				-content-hash = "18924ae12a00bde4438a46168bc167ed69613ab1ab0c387f193cd47ac24379b2"
			
 
				+content-hash = "85aa4be7defee8fe6622cf95ba03e81895121502ebf6d666d6ce376ff019fac7"
			
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -203,7 +203,7 @@ transformers = "~4.35.0"
 
				 unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
			
 
				 websocket-client = "~1.7.0"
			
 
				 werkzeug = "~3.0.1"
			
 
				-xinference-client = "0.13.3"
			
 
				+xinference-client = "0.15.2"
			
 
				 yarl = "~1.9.4"
			
 
				 zhipuai = "1.0.7"
			
 
				 # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.
			
--- a/api/tests/integration_tests/model_runtime/__mock/xinference.py
+++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -9,7 +9,6 @@ from requests.exceptions import ConnectionError
 
				 from requests.sessions import Session
			
 
				 from xinference_client.client.restful.restful_client import (
			
 
				     Client,
			
 
				-    RESTfulChatglmCppChatModelHandle,
			
 
				     RESTfulChatModelHandle,
			
 
				     RESTfulEmbeddingModelHandle,
			
 
				     RESTfulGenerateModelHandle,
			
@@ -19,9 +18,7 @@ from xinference_client.types import Embedding, EmbeddingData, EmbeddingUsage
 
				 
			
 
				 
			
 
				 class MockXinferenceClass:
			
 
				-    def get_chat_model(
			
 
				-        self: Client, model_uid: str
			
 
				-    ) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
			
 
				+    def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
			
 
				         if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url):
			
 
				             raise RuntimeError("404 Not Found")