Skip to content

Commit 203a627

Browse files
authored
Merge pull request #3028 from sam-s10s/fix/smx-tts-retry
SpeechmaticsTTS - Support for retry when 503 error to TTS API.
2 parents 2006a64 + 8d21b54 commit 203a627

File tree

2 files changed

+99
-33
lines changed

2 files changed

+99
-33
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1919

2020
## [0.0.94] - 2025-11-10
2121

22+
### Changed
23+
24+
- Added support for retrying `SpeechmaticsTTSService` when it returns a 503
25+
error. Default values in `InputParams`.
26+
2227
### Deprecated
2328

2429
- The `KrispFilter` is deprecated and will be removed in a future version. Use

src/pipecat/services/speechmatics/tts.py

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
"""Speechmatics TTS service integration."""
88

9+
import asyncio
910
from typing import AsyncGenerator, Optional
1011
from urllib.parse import urlencode
1112

@@ -21,6 +22,7 @@
2122
TTSStoppedFrame,
2223
)
2324
from pipecat.services.tts_service import TTSService
25+
from pipecat.utils.network import exponential_backoff_time
2426
from pipecat.utils.tracing.service_decorators import traced_tts
2527

2628
try:
@@ -43,9 +45,13 @@ class SpeechmaticsTTSService(TTSService):
4345
SPEECHMATICS_SAMPLE_RATE = 16000
4446

4547
class InputParams(BaseModel):
46-
"""Optional input parameters for Speechmatics TTS configuration."""
48+
"""Optional input parameters for Speechmatics TTS configuration.
4749
48-
pass
50+
Parameters:
51+
max_retries: Maximum number of retries for TTS requests. Defaults to 5.
52+
"""
53+
54+
max_retries: int = 5
4955

5056
def __init__(
5157
self,
@@ -109,64 +115,119 @@ async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
109115
Yields:
110116
Frame: Audio frames containing the synthesized speech.
111117
"""
118+
# Log the TTS started frame
112119
logger.debug(f"{self}: Generating TTS [{text}]")
113120

121+
# HTTP headers
114122
headers = {
115123
"Authorization": f"Bearer {self._api_key}",
116124
"Content-Type": "application/json",
117125
}
118126

127+
# HTTP payload
119128
payload = {
120129
"text": text,
121130
}
122131

132+
# Complete HTTP URL
123133
url = _get_endpoint_url(self._base_url, self._voice_id, self.sample_rate)
124134

125135
try:
136+
# Start TTS TTFB metrics
126137
await self.start_ttfb_metrics()
127138

128-
async with self._session.post(url, json=payload, headers=headers) as response:
129-
if response.status != 200:
130-
error_message = f"Speechmatics TTS error: HTTP {response.status}"
131-
logger.error(error_message)
132-
yield ErrorFrame(error=error_message)
133-
return
139+
# Track attempt
140+
attempt = 0
141+
142+
# Keep retrying until we get a 200 response or timeout
143+
while True:
144+
async with self._session.post(url, json=payload, headers=headers) as response:
145+
"""Evaluate response from TTS service."""
146+
147+
# 503 : Service unavailable
148+
if response.status == 503:
149+
"""Calculate the backoff time and retry."""
150+
151+
try:
152+
# Calculate the backoff time
153+
backoff_time = exponential_backoff_time(
154+
attempt=attempt, min_wait=0.25, max_wait=8.0, multiplier=0.5
155+
)
156+
157+
# Increment attempt
158+
attempt += 1
159+
160+
# Check if we've exceeded the maximum number of attempts
161+
if attempt >= self._params.max_retries:
162+
raise ValueError()
163+
164+
# Report error frame
165+
yield ErrorFrame(
166+
error=f"{self} Service unavailable [503] (attempt {attempt}, retry in {backoff_time:.2f}s)"
167+
)
168+
169+
# Wait before retrying
170+
await asyncio.sleep(backoff_time)
171+
172+
# Retry
173+
continue
174+
175+
except (ValueError, ArithmeticError):
176+
yield ErrorFrame(
177+
error=f"{self} Service unavailable [503] (attempts {attempt})",
178+
fatal=True,
179+
)
180+
return
181+
182+
# != 200 : Error
183+
if response.status != 200:
184+
yield ErrorFrame(
185+
error=f"{self} Service unavailable [{response.status}]", fatal=True
186+
)
187+
return
134188

135-
await self.start_tts_usage_metrics(text)
189+
# Update Pipecat metrics
190+
await self.start_tts_usage_metrics(text)
136191

137-
yield TTSStartedFrame()
192+
# Emit the TTS started frame
193+
yield TTSStartedFrame()
138194

139-
# Process the response in streaming chunks
140-
first_chunk = True
141-
buffer = b""
195+
# Process the response in streaming chunks
196+
first_chunk = True
197+
buffer = b""
142198

143-
async for chunk in response.content.iter_any():
144-
if not chunk:
145-
continue
146-
if first_chunk:
147-
await self.stop_ttfb_metrics()
148-
first_chunk = False
199+
# Iterate over each audio data chunk from the TTS API
200+
async for chunk in response.content.iter_any():
201+
if not chunk:
202+
continue
203+
if first_chunk:
204+
await self.stop_ttfb_metrics()
205+
first_chunk = False
149206

150-
buffer += chunk
207+
buffer += chunk
151208

152-
# Emit all complete 2-byte int16 samples from buffer
153-
if len(buffer) >= 2:
154-
complete_samples = len(buffer) // 2
155-
complete_bytes = complete_samples * 2
209+
# Emit all complete 2-byte int16 samples from buffer
210+
if len(buffer) >= 2:
211+
complete_samples = len(buffer) // 2
212+
complete_bytes = complete_samples * 2
156213

157-
audio_data = buffer[:complete_bytes]
158-
buffer = buffer[complete_bytes:] # Keep remaining bytes for next iteration
214+
audio_data = buffer[:complete_bytes]
215+
buffer = buffer[complete_bytes:]
159216

160-
yield TTSAudioRawFrame(
161-
audio=audio_data,
162-
sample_rate=self.sample_rate,
163-
num_channels=1,
164-
)
217+
# Emit the audio frame
218+
yield TTSAudioRawFrame(
219+
audio=audio_data,
220+
sample_rate=self.sample_rate,
221+
num_channels=1,
222+
)
223+
224+
# Successfully processed the response, break out of retry loop
225+
break
165226

166227
except Exception as e:
167-
logger.exception(f"Error generating TTS: {e}")
168-
yield ErrorFrame(error=f"Speechmatics TTS error: {str(e)}")
228+
yield ErrorFrame(error=f"{self}: Error generating TTS: {e}", fatal=True)
169229
finally:
230+
# Emit the TTS stopped frame
170231
yield TTSStoppedFrame()
171232

172233

0 commit comments

Comments
 (0)