|
8 | 8 | - "turbopuffer": Uses TurboPuffer + LangChain with function calling |
9 | 9 |
|
10 | 10 | Flow: |
11 | | -1. Twilio triggers webhook for phone number on /twilio/voice |
12 | | -2. Start a bi directional stream using start.stream which goes to /twilio/media |
13 | | -3. Start a call on Stream's edge network |
14 | | -4. Create a participant for the phone call and join the call |
15 | | -5. Create the AI, and have the AI join the call |
16 | | -
|
17 | | -Notes: Twilio uses ulaw audio encoding at 8kHz. |
18 | | -
|
19 | | -TODO/ to fix: |
20 | | -- Things should prep when creating the call in the voice endpoint |
21 | | -- Auth for stream endpoint |
22 | | -- Ulaw audio bugs |
23 | | -- Frankfurt connection bug |
24 | | -- Study best practices for Gemini RAG |
25 | | -- Study Turbopuffer Rag |
26 | | -- Add an outbound calling example |
27 | | -- See if there is a nicer diff approach to rag indexing |
28 | | -- Write docs about Rag |
| 11 | +1. Twilio triggers webhook on /twilio/voice, which starts preparing the call |
| 12 | +2. Start a bi-directional stream using start.stream which goes to /twilio/media |
| 13 | +3. When media stream connects, await the prepared call and attach the phone user |
| 14 | +4. Run the agent session until the call ends |
| 15 | +
|
| 16 | +Notes: Twilio uses mulaw audio encoding at 8kHz. |
29 | 17 | """ |
30 | 18 | import asyncio |
31 | 19 | import logging |
32 | 20 | import os |
| 21 | +import traceback |
| 22 | +import uuid |
33 | 23 | from pathlib import Path |
34 | 24 |
|
35 | 25 | import uvicorn |
36 | 26 | from dotenv import load_dotenv |
37 | | -from fastapi import Depends, FastAPI, WebSocket |
38 | | -from getstream.video import rtc |
39 | | -from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import TrackType |
40 | | -from getstream.video.rtc.track_util import PcmData |
41 | | -from getstream.video.rtc.tracks import SubscriptionConfig, TrackSubscriptionConfig |
| 27 | +from fastapi import Depends, FastAPI, Request, WebSocket |
| 28 | +from fastapi.responses import JSONResponse |
42 | 29 |
|
43 | 30 | from vision_agents.core import User, Agent |
44 | 31 | from vision_agents.plugins import getstream, gemini, twilio, elevenlabs, deepgram |
|
62 | 49 | app = FastAPI() |
63 | 50 | call_registry = twilio.TwilioCallRegistry() |
64 | 51 |
|
65 | | -""" |
66 | | -Twilio call webhook points here. Signature is validated and we start the media stream |
67 | | -""" |
| 52 | + |
| 53 | +@app.exception_handler(Exception) |
| 54 | +async def global_exception_handler(request: Request, exc: Exception): |
| 55 | + logger.error(f"Unhandled exception: {exc}\n{traceback.format_exc()}") |
| 56 | + return JSONResponse(status_code=500, content={"detail": str(exc)}) |
| 57 | + |
| 58 | + |
68 | 59 | @app.post("/twilio/voice") |
69 | 60 | async def twilio_voice_webhook( |
70 | 61 | _: None = Depends(twilio.verify_twilio_signature), |
71 | 62 | data: twilio.CallWebhookInput = Depends(twilio.CallWebhookInput.as_form), |
72 | 63 | ): |
73 | | - url = f"wss://{NGROK_URL}/twilio/media/{data.call_sid}" |
74 | | - logger.info(f"📞 Call from {data.caller} ({data.caller_city or 'unknown location'}) forwarding to {url}") |
| 64 | + """Twilio call webhook. Validates signature and starts the media stream.""" |
| 65 | + logger.info(f"📞 Call from {data.caller} ({data.caller_city or 'unknown location'})") |
| 66 | + call_id = str(uuid.uuid4()) |
75 | 67 |
|
76 | | - call_registry.create(data.call_sid, data) |
| 68 | + async def prepare_call(): |
| 69 | + agent = await create_agent() |
| 70 | + await agent.create_user() |
| 71 | + |
| 72 | + phone_number = data.from_number or "unknown" |
| 73 | + sanitized_number = phone_number.replace("+", "").replace(" ", "").replace("(", "").replace(")", "") |
| 74 | + phone_user = User(name=f"Call from {phone_number}", id=f"phone-{sanitized_number}") |
| 75 | + await agent.edge.create_user(user=phone_user) |
| 76 | + |
| 77 | + stream_call = await agent.create_call("default", call_id=call_id) |
| 78 | + agent_session = await agent.join(stream_call, wait_for_participant=False) |
| 79 | + return agent, phone_user, stream_call, agent_session |
| 80 | + |
| 81 | + twilio_call = call_registry.create(call_id, data, prepare=prepare_call) |
| 82 | + url = f"wss://{NGROK_URL}/twilio/media/{call_id}/{twilio_call.token}" |
| 83 | + logger.info("twilio redirect to %s", url) |
77 | 84 |
|
78 | 85 | return twilio.create_media_stream_response(url) |
79 | 86 |
|
80 | 87 |
|
81 | | -""" |
82 | | -Twilio media stream endpoint |
83 | | -""" |
84 | | -@app.websocket("/twilio/media/{call_sid}") |
85 | | -async def media_stream(websocket: WebSocket, call_sid: str): |
| 88 | +@app.websocket("/twilio/media/{call_id}/{token}") |
| 89 | +async def media_stream(websocket: WebSocket, call_id: str, token: str): |
86 | 90 | """Receive real-time audio stream from Twilio.""" |
87 | | - twilio_call = call_registry.require(call_sid) |
| 91 | + twilio_call = call_registry.validate(call_id, token) |
88 | 92 |
|
89 | | - logger.info(f"🔗 Media stream connecting for {twilio_call.caller} from {twilio_call.caller_city or 'unknown location'}") |
| 93 | + logger.info(f"🔗 Media stream connected for {twilio_call.caller}") |
90 | 94 |
|
91 | 95 | twilio_stream = twilio.TwilioMediaStream(websocket) |
92 | 96 | await twilio_stream.accept() |
93 | 97 | twilio_call.twilio_stream = twilio_stream |
94 | 98 |
|
95 | 99 | try: |
96 | | - agent = await create_agent() |
97 | | - await agent.create_user() |
98 | | - |
99 | | - phone_number = twilio_call.from_number or "unknown" |
100 | | - sanitized_number = phone_number.replace("+", "").replace(" ", "").replace("(", "").replace(")", "") |
101 | | - phone_user = User(name=f"Call from {phone_number}", id=f"phone-{sanitized_number}") |
102 | | - await agent.edge.create_user(user=phone_user) |
103 | | - |
104 | | - stream_call = await agent.create_call("default", call_sid) |
| 100 | + agent, phone_user, stream_call, agent_session = await twilio_call.await_prepare() |
105 | 101 | twilio_call.stream_call = stream_call |
106 | 102 |
|
107 | | - await join_call(agent, stream_call, twilio_stream, phone_user) |
| 103 | + await twilio.attach_phone_to_call(stream_call, twilio_stream, phone_user.id) |
| 104 | + |
| 105 | + with agent_session: |
| 106 | + await agent.llm.simple_response( |
| 107 | + text="Greet the caller warmly and ask what kind of app they're building. Use your knowledge base to provide relevant product recommendations." |
| 108 | + ) |
| 109 | + await twilio_stream.run() |
108 | 110 | finally: |
109 | | - call_registry.remove(call_sid) |
| 111 | + call_registry.remove(call_id) |
110 | 112 |
|
111 | 113 |
|
112 | | -async def startup_event(): |
| 114 | +async def create_rag_knowledge(): |
113 | 115 | """Initialize the RAG backend based on RAG_BACKEND environment variable.""" |
114 | 116 | global file_search_store, rag |
115 | 117 |
|
@@ -197,38 +199,7 @@ async def search_knowledge(query: str) -> str: |
197 | 199 | ) |
198 | 200 |
|
199 | 201 |
|
200 | | -# ============================================================================= |
201 | | -# Call Handling |
202 | | -# ============================================================================= |
203 | | - |
204 | | - |
205 | | -async def join_call( |
206 | | - agent: Agent, call, twilio_stream: twilio.TwilioMediaStream, phone_user: User |
207 | | -) -> None: |
208 | | - """Join a call and bridge audio between Twilio and Stream.""" |
209 | | - subscription_config = SubscriptionConfig( |
210 | | - default=TrackSubscriptionConfig(track_types=[TrackType.TRACK_TYPE_AUDIO]) |
211 | | - ) |
212 | | - |
213 | | - connection = await rtc.join(call, phone_user.id, subscription_config=subscription_config) |
214 | | - |
215 | | - @connection.on("audio") |
216 | | - async def on_audio_received(pcm: PcmData): |
217 | | - await twilio_stream.send_audio(pcm) |
218 | | - |
219 | | - await connection.__aenter__() |
220 | | - await connection.add_tracks(audio=twilio_stream.audio_track, video=None) |
221 | | - |
222 | | - logger.info(f"{phone_user.name} joined the call, agent is joining next") |
223 | | - |
224 | | - with await agent.join(call): |
225 | | - await agent.llm.simple_response( |
226 | | - text="Greet the caller warmly and ask what kind of app they're building. Use your knowledge base to provide relevant product recommendations." |
227 | | - ) |
228 | | - await twilio_stream.run() |
229 | | - |
230 | | - |
231 | 202 | if __name__ == "__main__": |
232 | | - asyncio.run(startup_event()) |
| 203 | + asyncio.run(create_rag_knowledge()) |
233 | 204 | logger.info(f"Starting with RAG_BACKEND={RAG_BACKEND}") |
234 | 205 | uvicorn.run(app, host="localhost", port=8000) |
0 commit comments