|
| 1 | +"""Deepgram Voice Agent client that routes its LLM calls through the local proxy. |
| 2 | +
|
| 3 | +Connects to the Deepgram Voice Agent API via the Python SDK and configures |
| 4 | +a custom "think" endpoint pointing at the proxy server. This lets you swap |
| 5 | +LLM backends (OpenAI, Bedrock, etc.) without touching the agent code — just |
| 6 | +change LLM_PROVIDER in your .env file. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + # First start the proxy: |
| 10 | + uvicorn src.proxy:app --port 8080 |
| 11 | +
|
| 12 | + # Then run the agent: |
| 13 | + python -m src.agent |
| 14 | +""" |
| 15 | + |
| 16 | +import os |
| 17 | +import signal |
| 18 | +import sys |
| 19 | +import threading |
| 20 | + |
| 21 | +from dotenv import load_dotenv |
| 22 | + |
| 23 | +load_dotenv() |
| 24 | + |
| 25 | +from deepgram import DeepgramClient |
| 26 | +from deepgram.agent.v1.types.agent_v1settings import AgentV1Settings |
| 27 | +from deepgram.agent.v1.types.agent_v1conversation_text import AgentV1ConversationText |
| 28 | +from deepgram.agent.v1.types.agent_v1settings_applied import AgentV1SettingsApplied |
| 29 | +from deepgram.agent.v1.types.agent_v1welcome import AgentV1Welcome |
| 30 | +from deepgram.agent.v1.types.agent_v1agent_thinking import AgentV1AgentThinking |
| 31 | +from deepgram.agent.v1.types.agent_v1agent_audio_done import AgentV1AgentAudioDone |
| 32 | +from deepgram.agent.v1.types.agent_v1error import AgentV1Error |
| 33 | +from deepgram.agent.v1.types.agent_v1function_call_request import AgentV1FunctionCallRequest |
| 34 | +from deepgram.agent.v1.types.agent_v1send_function_call_response import AgentV1SendFunctionCallResponse |
| 35 | + |
| 36 | +PROXY_URL = os.environ.get("PROXY_URL", "http://localhost:8080") |
| 37 | + |
| 38 | + |
| 39 | +def build_agent_settings(proxy_url: str = PROXY_URL) -> AgentV1Settings: |
| 40 | + """Build the Voice Agent settings that point the think endpoint at the proxy. |
| 41 | +
|
| 42 | + The key insight: setting think.endpoint.url to our proxy means every LLM |
| 43 | + call the agent makes goes through the proxy, which routes to whichever |
| 44 | + backend LLM_PROVIDER selects. Changing providers requires zero code changes |
| 45 | + in this file — just update .env. |
| 46 | + """ |
| 47 | + return AgentV1Settings( |
| 48 | + type="Settings", |
| 49 | + # ← tag is REQUIRED on every Deepgram API call |
| 50 | + tags=["deepgram-examples"], |
| 51 | + audio={ |
| 52 | + "input": {"encoding": "linear16", "sample_rate": 16000}, |
| 53 | + "output": {"encoding": "linear16", "sample_rate": 16000}, |
| 54 | + }, |
| 55 | + agent={ |
| 56 | + "listen": { |
| 57 | + "provider": {"type": "deepgram", "model": "nova-3"}, |
| 58 | + }, |
| 59 | + "think": { |
| 60 | + "provider": {"type": "open_ai", "model": "proxy"}, |
| 61 | + # ← THIS enables custom LLM routing: the Voice Agent sends |
| 62 | + # chat-completions requests to our proxy instead of OpenAI |
| 63 | + "endpoint": { |
| 64 | + "url": f"{proxy_url}/v1/chat/completions", |
| 65 | + "headers": {}, |
| 66 | + }, |
| 67 | + "prompt": ( |
| 68 | + "You are a helpful voice assistant. Keep responses brief " |
| 69 | + "and conversational — the user is speaking to you, not reading." |
| 70 | + ), |
| 71 | + }, |
| 72 | + "speak": { |
| 73 | + "provider": {"type": "deepgram", "model": "aura-2-thalia-en"}, |
| 74 | + }, |
| 75 | + "greeting": "Hello! I'm your voice assistant powered by Deepgram. How can I help?", |
| 76 | + }, |
| 77 | + ) |
| 78 | + |
| 79 | + |
| 80 | +def run_agent(proxy_url: str = PROXY_URL) -> None: |
| 81 | + """Connect to the Deepgram Voice Agent API and stream microphone audio. |
| 82 | +
|
| 83 | + This is a demonstration entry point. In production you'd pipe audio from |
| 84 | + a phone call, browser WebSocket, or other source instead of the microphone. |
| 85 | + """ |
| 86 | + if not os.environ.get("DEEPGRAM_API_KEY"): |
| 87 | + print("Error: DEEPGRAM_API_KEY not set", file=sys.stderr) |
| 88 | + sys.exit(1) |
| 89 | + |
| 90 | + client = DeepgramClient() |
| 91 | + settings = build_agent_settings(proxy_url) |
| 92 | + |
| 93 | + print(f"[agent] Connecting to Deepgram Voice Agent (proxy at {proxy_url})...") |
| 94 | + |
| 95 | + with client.agent.v1.connect() as connection: |
| 96 | + connection.send_settings(settings) |
| 97 | + |
| 98 | + stop_event = threading.Event() |
| 99 | + |
| 100 | + def on_recv(): |
| 101 | + while not stop_event.is_set(): |
| 102 | + try: |
| 103 | + msg = connection.recv() |
| 104 | + except Exception: |
| 105 | + break |
| 106 | + |
| 107 | + if isinstance(msg, AgentV1Welcome): |
| 108 | + print(f"[agent] Connected — request_id: {msg.request_id}") |
| 109 | + elif isinstance(msg, AgentV1SettingsApplied): |
| 110 | + print("[agent] Settings applied — proxy endpoint active") |
| 111 | + elif isinstance(msg, AgentV1ConversationText): |
| 112 | + print(f"[{msg.role}] {msg.content}") |
| 113 | + elif isinstance(msg, AgentV1AgentThinking): |
| 114 | + print("[agent] Thinking...") |
| 115 | + elif isinstance(msg, AgentV1AgentAudioDone): |
| 116 | + print("[agent] Audio done") |
| 117 | + elif isinstance(msg, AgentV1Error): |
| 118 | + print(f"[agent] Error: {msg.description}") |
| 119 | + elif isinstance(msg, bytes): |
| 120 | + pass |
| 121 | + elif isinstance(msg, AgentV1FunctionCallRequest): |
| 122 | + for fn in msg.functions or []: |
| 123 | + connection.send_function_call_response( |
| 124 | + AgentV1SendFunctionCallResponse( |
| 125 | + type="FunctionCallResponse", |
| 126 | + id=fn.id, |
| 127 | + output='{"error": "no functions registered"}', |
| 128 | + ) |
| 129 | + ) |
| 130 | + |
| 131 | + recv_thread = threading.Thread(target=on_recv, daemon=True) |
| 132 | + recv_thread.start() |
| 133 | + |
| 134 | + print("[agent] Agent is running. Press Ctrl+C to stop.") |
| 135 | + print("[agent] (No microphone input in this demo — connect a real audio source)") |
| 136 | + |
| 137 | + def handle_signal(sig, frame): |
| 138 | + stop_event.set() |
| 139 | + |
| 140 | + signal.signal(signal.SIGINT, handle_signal) |
| 141 | + |
| 142 | + stop_event.wait() |
| 143 | + print("\n[agent] Shutting down...") |
| 144 | + |
| 145 | + |
| 146 | +if __name__ == "__main__": |
| 147 | + run_agent() |
0 commit comments