1 rok temu · 397a92f2ee
--- a/api/services/audio_service.py
+++ b/api/services/audio_service.py
@@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
 
				 from core.llm.whisper import Whisper
			
 
				 from models.provider import ProviderName
			
 
				 
			
 
				-FILE_SIZE_LIMIT = 1 * 1024 * 1024
			
 
				+FILE_SIZE = 15
			
 
				+FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
			
 
				 ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
			
 
				 
			
 
				 class AudioService:
			
@@ -23,17 +24,17 @@ class AudioService:
 
				         file_size = len(file_content)
			
 
				 
			
 
				         if file_size > FILE_SIZE_LIMIT:
			
 
				-            message = f"({file_size} > {FILE_SIZE_LIMIT})"
			
 
				+            message = f"Audio size larger than {FILE_SIZE} mb"
			
 
				             raise AudioTooLargeServiceError(message)
			
 
				         
			
 
				         provider_name = LLMBuilder.get_default_provider(tenant_id)
			
 
				         if provider_name != ProviderName.OPENAI.value:
			
 
				-            raise ProviderNotSupportSpeechToTextServiceError('haha')
			
 
				+            raise ProviderNotSupportSpeechToTextServiceError()
			
 
				 
			
 
				         provider_service = LLMProviderService(tenant_id, provider_name)
			
 
				 
			
 
				         buffer = io.BytesIO(file_content)
			
 
				-        buffer.name = 'temp.wav'
			
 
				+        buffer.name = 'temp.mp3'
			
 
				 
			
 
				         return Whisper(provider_service.provider).transcribe(buffer)
			
 
				 
			
--- a/api/services/errors/audio.py
+++ b/api/services/errors/audio.py
@@ -1,23 +1,13 @@
 
				-from services.errors.base import BaseServiceError
			
 
				+class NoAudioUploadedServiceError(Exception):
			
 
				+    pass
			
 
				 
			
 
				-class NoAudioUploadedServiceError(BaseServiceError):
			
 
				-    error_code = 'no_audio_uploaded'
			
 
				-    description = "Please upload your audio."
			
 
				-    code = 400
			
 
				 
			
 
				+class AudioTooLargeServiceError(Exception):
			
 
				+    pass
			
 
				 
			
 
				-class AudioTooLargeServiceError(BaseServiceError):
			
 
				-    error_code = 'audio_too_large'
			
 
				-    description = "Audio size exceeded. {message}"
			
 
				-    code = 413
			
 
				 
			
 
				+class UnsupportedAudioTypeServiceError(Exception):
			
 
				+    pass
			
 
				 
			
 
				-class UnsupportedAudioTypeServiceError(BaseServiceError):
			
 
				-    error_code = 'unsupported_audio_type'
			
 
				-    description = "Audio type not allowed."
			
 
				-    code = 415
			
 
				-
			
 
				-class ProviderNotSupportSpeechToTextServiceError(BaseServiceError):
			
 
				-    error_code = 'provider_not_support_speech_to_text'
			
 
				-    description = "Provider not support speech to text. {message}"
			
 
				-    code = 400
			
 
				+class ProviderNotSupportSpeechToTextServiceError(Exception):
			
 
				+    pass
			
--- a/web/app/components/base/voice-input/index.tsx
+++ b/web/app/components/base/voice-input/index.tsx
@@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
 
				 import cn from 'classnames'
			
 
				 import Recorder from 'js-audio-recorder'
			
 
				 import { useRafInterval } from 'ahooks'
			
 
				+import { convertToMp3 } from './utils'
			
 
				 import s from './index.module.css'
			
 
				 import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
			
 
				 import { Loading02, XClose } from '@/app/components/base/icons/src/vender/line/general'
			
@@ -19,7 +20,12 @@ const VoiceInput = ({
 
				   onConverted,
			
 
				 }: VoiceInputTypes) => {
			
 
				   const { t } = useTranslation()
			
 
				-  const recorder = useRef(new Recorder())
			
 
				+  const recorder = useRef(new Recorder({
			
 
				+    sampleBits: 16,
			
 
				+    sampleRate: 16000,
			
 
				+    numChannels: 1,
			
 
				+    compiling: false,
			
 
				+  }))
			
 
				   const canvasRef = useRef<HTMLCanvasElement | null>(null)
			
 
				   const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
			
 
				   const drawRecordId = useRef<number | null>(null)
			
@@ -75,10 +81,10 @@ const VoiceInput = ({
 
				     const canvas = canvasRef.current!
			
 
				     const ctx = ctxRef.current!
			
 
				     ctx.clearRect(0, 0, canvas.width, canvas.height)
			
 
				-    const wavBlob = recorder.current.getWAVBlob()
			
 
				-    const wavFile = new File([wavBlob], 'a.wav', { type: 'audio/wav' })
			
 
				+    const mp3Blob = convertToMp3(recorder.current)
			
 
				+    const mp3File = new File([mp3Blob], 'temp.mp3', { type: 'audio/mp3' })
			
 
				     const formData = new FormData()
			
 
				-    formData.append('file', wavFile)
			
 
				+    formData.append('file', mp3File)
			
 
				 
			
 
				     let url = ''
			
 
				     let isPublic = false
			
--- a/web/app/components/base/voice-input/utils.ts
+++ b/web/app/components/base/voice-input/utils.ts
@@ -0,0 +1,38 @@
 
				+import lamejs from 'lamejs'
			
 
				+
			
 
				+export const convertToMp3 = (recorder: any) => {
			
 
				+  const wav = lamejs.WavHeader.readHeader(recorder.getWAV())
			
 
				+  const { channels, sampleRate } = wav
			
 
				+  const mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128)
			
 
				+  const result = recorder.getChannelData()
			
 
				+  const buffer = []
			
 
				+
			
 
				+  const leftData = result.left && new Int16Array(result.left.buffer, 0, result.left.byteLength / 2)
			
 
				+  const rightData = result.right && new Int16Array(result.right.buffer, 0, result.right.byteLength / 2)
			
 
				+  const remaining = leftData.length + (rightData ? rightData.length : 0)
			
 
				+
			
 
				+  const maxSamples = 1152
			
 
				+  for (let i = 0; i < remaining; i += maxSamples) {
			
 
				+    const left = leftData.subarray(i, i + maxSamples)
			
 
				+    let right = null
			
 
				+    let mp3buf = null
			
 
				+
			
 
				+    if (channels === 2) {
			
 
				+      right = rightData.subarray(i, i + maxSamples)
			
 
				+      mp3buf = mp3enc.encodeBuffer(left, right)
			
 
				+    }
			
 
				+    else {
			
 
				+      mp3buf = mp3enc.encodeBuffer(left)
			
 
				+    }
			
 
				+
			
 
				+    if (mp3buf.length > 0)
			
 
				+      buffer.push(mp3buf)
			
 
				+  }
			
 
				+
			
 
				+  const enc = mp3enc.flush()
			
 
				+
			
 
				+  if (enc.length > 0)
			
 
				+    buffer.push(enc)
			
 
				+
			
 
				+  return new Blob(buffer, { type: 'audio/mp3' })
			
 
				+}
			
--- a/web/global.d.ts
+++ b/web/global.d.ts
@@ -0,0 +1 @@
 
				+declare module 'lamejs';
			
--- a/web/package.json
+++ b/web/package.json
@@ -81,7 +81,8 @@
 
				     "swr": "^2.1.0",
			
 
				     "tailwindcss": "^3.2.7",
			
 
				     "typescript": "4.9.5",
			
 
				-    "use-context-selector": "^1.4.1"
			
 
				+    "use-context-selector": "^1.4.1",
			
 
				+    "lamejs": "1.2.0"
			
 
				   },
			
 
				   "devDependencies": {
			
 
				     "@antfu/eslint-config": "^0.36.0",