66
77"""Speechmatics TTS service integration."""
88
9+ import asyncio
910from typing import AsyncGenerator , Optional
1011from urllib .parse import urlencode
1112
2122 TTSStoppedFrame ,
2223)
2324from pipecat .services .tts_service import TTSService
25+ from pipecat .utils .network import exponential_backoff_time
2426from pipecat .utils .tracing .service_decorators import traced_tts
2527
2628try :
@@ -43,9 +45,13 @@ class SpeechmaticsTTSService(TTSService):
4345 SPEECHMATICS_SAMPLE_RATE = 16000
4446
4547 class InputParams (BaseModel ):
46- """Optional input parameters for Speechmatics TTS configuration."""
48+ """Optional input parameters for Speechmatics TTS configuration.
4749
48- pass
50+ Parameters:
51+ max_retries: Maximum number of retries for TTS requests. Defaults to 5.
52+ """
53+
54+ max_retries : int = 5
4955
5056 def __init__ (
5157 self ,
@@ -109,64 +115,119 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
109115 Yields:
110116 Frame: Audio frames containing the synthesized speech.
111117 """
118+ # Log the TTS started frame
112119 logger .debug (f"{ self } : Generating TTS [{ text } ]" )
113120
121+ # HTTP headers
114122 headers = {
115123 "Authorization" : f"Bearer { self ._api_key } " ,
116124 "Content-Type" : "application/json" ,
117125 }
118126
127+ # HTTP payload
119128 payload = {
120129 "text" : text ,
121130 }
122131
132+ # Complete HTTP URL
123133 url = _get_endpoint_url (self ._base_url , self ._voice_id , self .sample_rate )
124134
125135 try :
136+ # Start TTS TTFB metrics
126137 await self .start_ttfb_metrics ()
127138
128- async with self ._session .post (url , json = payload , headers = headers ) as response :
129- if response .status != 200 :
130- error_message = f"Speechmatics TTS error: HTTP { response .status } "
131- logger .error (error_message )
132- yield ErrorFrame (error = error_message )
133- return
139+ # Track attempt
140+ attempt = 0
141+
142+ # Keep retrying until we get a 200 response or timeout
143+ while True :
144+ async with self ._session .post (url , json = payload , headers = headers ) as response :
145+ """Evaluate response from TTS service."""
146+
147+ # 503 : Service unavailable
148+ if response .status == 503 :
149+ """Calculate the backoff time and retry."""
150+
151+ try :
152+ # Calculate the backoff time
153+ backoff_time = exponential_backoff_time (
154+ attempt = attempt , min_wait = 0.25 , max_wait = 8.0 , multiplier = 0.5
155+ )
156+
157+ # Increment attempt
158+ attempt += 1
159+
160+ # Check if we've exceeded the maximum number of attempts
161+ if attempt >= self ._params .max_retries :
162+ raise ValueError ()
163+
164+ # Report error frame
165+ yield ErrorFrame (
166+ error = f"{ self } Service unavailable [503] (attempt { attempt } , retry in { backoff_time :.2f} s)"
167+ )
168+
169+ # Wait before retrying
170+ await asyncio .sleep (backoff_time )
171+
172+ # Retry
173+ continue
174+
175+ except (ValueError , ArithmeticError ):
176+ yield ErrorFrame (
177+ error = f"{ self } Service unavailable [503] (attempts { attempt } )" ,
178+ fatal = True ,
179+ )
180+ return
181+
182+ # != 200 : Error
183+ if response .status != 200 :
184+ yield ErrorFrame (
185+ error = f"{ self } Service unavailable [{ response .status } ]" , fatal = True
186+ )
187+ return
134188
135- await self .start_tts_usage_metrics (text )
189+ # Update Pipecat metrics
190+ await self .start_tts_usage_metrics (text )
136191
137- yield TTSStartedFrame ()
192+ # Emit the TTS started frame
193+ yield TTSStartedFrame ()
138194
139- # Process the response in streaming chunks
140- first_chunk = True
141- buffer = b""
195+ # Process the response in streaming chunks
196+ first_chunk = True
197+ buffer = b""
142198
143- async for chunk in response .content .iter_any ():
144- if not chunk :
145- continue
146- if first_chunk :
147- await self .stop_ttfb_metrics ()
148- first_chunk = False
199+ # Iterate over each audio data chunk from the TTS API
200+ async for chunk in response .content .iter_any ():
201+ if not chunk :
202+ continue
203+ if first_chunk :
204+ await self .stop_ttfb_metrics ()
205+ first_chunk = False
149206
150- buffer += chunk
207+ buffer += chunk
151208
152- # Emit all complete 2-byte int16 samples from buffer
153- if len (buffer ) >= 2 :
154- complete_samples = len (buffer ) // 2
155- complete_bytes = complete_samples * 2
209+ # Emit all complete 2-byte int16 samples from buffer
210+ if len (buffer ) >= 2 :
211+ complete_samples = len (buffer ) // 2
212+ complete_bytes = complete_samples * 2
156213
157- audio_data = buffer [:complete_bytes ]
158- buffer = buffer [complete_bytes :] # Keep remaining bytes for next iteration
214+ audio_data = buffer [:complete_bytes ]
215+ buffer = buffer [complete_bytes :]
159216
160- yield TTSAudioRawFrame (
161- audio = audio_data ,
162- sample_rate = self .sample_rate ,
163- num_channels = 1 ,
164- )
217+ # Emit the audio frame
218+ yield TTSAudioRawFrame (
219+ audio = audio_data ,
220+ sample_rate = self .sample_rate ,
221+ num_channels = 1 ,
222+ )
223+
224+ # Successfully processed the response, break out of retry loop
225+ break
165226
166227 except Exception as e :
167- logger .exception (f"Error generating TTS: { e } " )
168- yield ErrorFrame (error = f"Speechmatics TTS error: { str (e )} " )
228+ yield ErrorFrame (error = f"{ self } : Error generating TTS: { e } " , fatal = True )
169229 finally :
230+ # Emit the TTS stopped frame
170231 yield TTSStoppedFrame ()
171232
172233
0 commit comments