před 1 rokem · c6e2900be7
--- a/api/core/model_runtime/model_providers/__base/tts_model.py
+++ b/api/core/model_runtime/model_providers/__base/tts_model.py
@@ -53,7 +53,7 @@ class TTSModel(AIModel):
 
				         """
			
 
				         raise NotImplementedError
			
 
				 
			
 
				-    def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list:
			
 
				+    def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
			
 
				         """
			
 
				         Get voice for given tts model voices
			
 
				 
			
@@ -66,7 +66,10 @@ class TTSModel(AIModel):
 
				 
			
 
				         if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
			
 
				             voices = model_schema.model_properties[ModelPropertyKey.VOICES]
			
 
				-            return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
			
 
				+            if language:
			
 
				+                return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
			
 
				+            else:
			
 
				+                return [{'name': d['name'], 'value': d['mode']} for d in voices]
			
 
				 
			
 
				     def _get_model_default_voice(self, model: str, credentials: dict) -> any:
			
 
				         """
			
--- a/api/core/model_runtime/model_providers/openai/tts/tts.py
+++ b/api/core/model_runtime/model_providers/openai/tts/tts.py
@@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
 
				         """
			
 
				         # transform credentials to kwargs for model instance
			
 
				         credentials_kwargs = self._to_credential_kwargs(credentials)
			
 
				-        if not voice:
			
 
				+        if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
			
 
				             voice = self._get_model_default_voice(model, credentials)
			
 
				         word_limit = self._get_model_word_limit(model, credentials)
			
 
				         audio_type = self._get_model_audio_type(model, credentials)
			
--- a/api/core/model_runtime/model_providers/tongyi/tts/tts.py
+++ b/api/core/model_runtime/model_providers/tongyi/tts/tts.py
@@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
 
				         :return: text translated to audio file
			
 
				         """
			
 
				         audio_type = self._get_model_audio_type(model, credentials)
			
 
				-        if not voice:
			
 
				+        if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
			
 
				             voice = self._get_model_default_voice(model, credentials)
			
 
				         if streaming:
			
 
				             return Response(stream_with_context(self._tts_invoke_streaming(model=model,
			
--- a/web/app/components/app/configuration/config-vision/param-config.tsx
+++ b/web/app/components/app/configuration/config-vision/param-config.tsx
@@ -3,7 +3,7 @@ import type { FC } from 'react'
 
				 import { memo, useState } from 'react'
			
 
				 import { useTranslation } from 'react-i18next'
			
 
				 import cn from 'classnames'
			
 
				-import ParamConfigContent from './param-config-content'
			
 
				+import VoiceParamConfig from './param-config-content'
			
 
				 import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
			
 
				 import {
			
 
				   PortalToFollowElem,
			
@@ -27,12 +27,12 @@ const ParamsConfig: FC = () => {
 
				       <PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}>
			
 
				         <div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}>
			
 
				           <Settings01 className='w-3.5 h-3.5 ' />
			
 
				-          <div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.vision.settings')}</div>
			
 
				+          <div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.voice.settings')}</div>
			
 
				         </div>
			
 
				       </PortalToFollowElemTrigger>
			
 
				       <PortalToFollowElemContent style={{ zIndex: 50 }}>
			
 
				         <div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'>
			
 
				-          <ParamConfigContent />
			
 
				+          <VoiceParamConfig />
			
 
				         </div>
			
 
				       </PortalToFollowElemContent>
			
 
				     </PortalToFollowElem>
			
--- a/web/app/components/app/configuration/config-voice/param-config-content.tsx
+++ b/web/app/components/app/configuration/config-voice/param-config-content.tsx
@@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => {
 
				   const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
			
 
				   const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
			
 
				 
			
 
				-  const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data
			
 
				+  const language = languageItem?.value
			
 
				+  const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
			
 
				   const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
			
 
				   const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
			
 
				 
			
--- a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
+++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
@@ -1,17 +1,27 @@
 
				 'use client'
			
 
				+import useSWR from 'swr'
			
 
				 import React, { type FC } from 'react'
			
 
				 import { useTranslation } from 'react-i18next'
			
 
				 import { useContext } from 'use-context-selector'
			
 
				+import { usePathname } from 'next/navigation'
			
 
				 import Panel from '@/app/components/app/configuration/base/feature-panel'
			
 
				 import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
			
 
				 import ConfigContext from '@/context/debug-configuration'
			
 
				 import { languages } from '@/utils/language'
			
 
				+import { fetchAppVoices } from '@/service/apps'
			
 
				 
			
 
				 const TextToSpeech: FC = () => {
			
 
				   const { t } = useTranslation()
			
 
				   const {
			
 
				     textToSpeechConfig,
			
 
				   } = useContext(ConfigContext)
			
 
				+
			
 
				+  const pathname = usePathname()
			
 
				+  const matched = pathname.match(/\/app\/([^/]+)/)
			
 
				+  const appId = (matched?.length && matched[1]) ? matched[1] : ''
			
 
				+  const language = textToSpeechConfig.language
			
 
				+  const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
			
 
				+  const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
			
 
				   return (
			
 
				     <Panel
			
 
				       title={
			
@@ -22,7 +32,7 @@ const TextToSpeech: FC = () => {
 
				       headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
			
 
				       headerRight={
			
 
				         <div className='text-xs text-gray-500'>
			
 
				-          {languages.find(i => i.value === textToSpeechConfig.language)?.name} {textToSpeechConfig.voice}
			
 
				+          {languages.find(i => i.value === textToSpeechConfig.language)?.name} - {voiceItem?.name ?? t('appDebug.voice.defaultDisplay')}
			
 
				         </div>
			
 
				       }
			
 
				       noBodySpacing
			
--- a/web/i18n/lang/app-debug.en.ts
+++ b/web/i18n/lang/app-debug.en.ts
@@ -300,6 +300,7 @@ const translation = {
 
				   },
			
 
				   voice: {
			
 
				     name: 'Voice',
			
 
				+    defaultDisplay: 'Default Voice',
			
 
				     description: 'Text to speech voice Settings',
			
 
				     settings: 'Settings',
			
 
				     voiceSettings: {
			
--- a/web/i18n/lang/app-debug.pt.ts
+++ b/web/i18n/lang/app-debug.pt.ts
@@ -300,6 +300,7 @@ const translation = {
 
				   },
			
 
				   voice: {
			
 
				     name: 'voz',
			
 
				+    defaultDisplay: 'Voz padrão',
			
 
				     description: 'Texto para configurações de timbre de voz',
			
 
				     settings: 'As configurações',
			
 
				     voiceSettings: {
			
--- a/web/i18n/lang/app-debug.zh.ts
+++ b/web/i18n/lang/app-debug.zh.ts
@@ -296,6 +296,7 @@ const translation = {
 
				   },
			
 
				   voice: {
			
 
				     name: '音色',
			
 
				+    defaultDisplay: '缺省音色',
			
 
				     description: '文本转语音音色设置',
			
 
				     settings: '设置',
			
 
				     voiceSettings: {
			
--- a/web/service/apps.ts
+++ b/web/service/apps.ts
@@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u
 
				   return post<GenerationIntroductionResponse>(url, { body })
			
 
				 }
			
 
				 
			
 
				-export const fetchAppVoices: Fetcher<AppVoicesListResponse, { url: string }> = ({ url }) => {
			
 
				-  return get<AppVoicesListResponse>(url)
			
 
				+export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
			
 
				+  return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
			
 
				 }