deepgram
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/.env.example‎
Lines changed: 5 additions & 0 deletions b/‎examples/530-voice-agent-multi-provider-proxy-python/.env.example‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/README.md‎
Lines changed: 65 additions & 0 deletions b/‎examples/530-voice-agent-multi-provider-proxy-python/README.md‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/requirements.txt‎
Lines changed: 8 additions & 0 deletions b/‎examples/530-voice-agent-multi-provider-proxy-python/requirements.txt‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/src/__init__.py‎ b/‎examples/530-voice-agent-multi-provider-proxy-python/src/__init__.py‎
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/src/demo_agent.py‎
Lines changed: 136 additions & 0 deletions b/‎examples/530-voice-agent-multi-provider-proxy-python/src/demo_agent.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎examples/530-voice-agent-multi-provider-proxy-python/src/providers.py‎
Lines changed: 121 additions & 0 deletions b/‎examples/530-voice-agent-multi-provider-proxy-python/src/providers.py‎
Lines changed: 121 additions & 0 deletions
@@ -0,0 +1,5 @@
+# Deepgram — https://console.deepgram.com/
+DEEPGRAM_API_KEY=
+
+# OpenAI — https://platform.openai.com/api-keys
+OPENAI_API_KEY=
@@ -0,0 +1,65 @@
+# Multi-Provider Chat Completions Proxy for Deepgram Voice Agent
+
+A FastAPI proxy server that exposes an OpenAI-compatible `/v1/chat/completions` endpoint, routing requests to multiple LLM backends (OpenAI, AWS Bedrock). The Deepgram Voice Agent API uses this proxy as its `think.endpoint.url`, letting you swap LLM providers without changing application code.
+
+## What you'll build
+
+A Python proxy server that sits between the Deepgram Voice Agent API and your choice of LLM backend. The Voice Agent handles speech-to-text (nova-3) and text-to-speech (aura-2) while all "thinking" routes through your proxy to OpenAI or AWS Bedrock — switchable via a single environment variable.
+
+## Prerequisites
+
+- Python 3.10+
+- Deepgram account — [get a free API key](https://console.deepgram.com/)
+- OpenAI account — [get an API key](https://platform.openai.com/api-keys)
+- AWS account (optional, for Bedrock) — [IAM console](https://console.aws.amazon.com/iam/)
+
+## Environment variables
+
+| Variable | Where to find it |
+|----------|-----------------|
+| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) |
+| `LLM_PROVIDER` | Set to `openai` or `bedrock` (default: `openai`) |
+| `OPENAI_API_KEY` | [OpenAI dashboard → API keys](https://platform.openai.com/api-keys) |
+| `AWS_ACCESS_KEY_ID` | [AWS IAM console](https://console.aws.amazon.com/iam/) (Bedrock only) |
+| `AWS_SECRET_ACCESS_KEY` | [AWS IAM console](https://console.aws.amazon.com/iam/) (Bedrock only) |
+| `AWS_REGION` | AWS region with Bedrock access, e.g. `us-east-1` (Bedrock only) |
+
+## Install and run
+
+```bash
+cp .env.example .env
+# Fill in your API keys in .env
+
+pip install -r requirements.txt
+
+# Start the proxy server
+cd src && uvicorn proxy:app --port 8080
+
+# In another terminal, run the demo Voice Agent
+python src/demo_agent.py
+```
+
+## Key parameters
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `think.provider.type` | `open_ai` | Tells the Voice Agent to use OpenAI-compatible format |
+| `think.endpoint.url` | `https://your-proxy.example.com/v1/chat/completions` | Points the agent's LLM calls at the proxy (must be HTTPS) |
+| `listen.provider.model` | `nova-3` | Deepgram's flagship STT model |
+| `speak.provider.model` | `aura-2-thalia-en` | Deepgram's TTS model |
+| `LLM_PROVIDER` | `openai` or `bedrock` | Which backend the proxy routes to |
+
+## How it works
+
+1. **Start the proxy** — FastAPI serves `/v1/chat/completions` on port 8080
+2. **Connect the Voice Agent** — The demo script opens a WebSocket to `wss://agent.deepgram.com/v1/agent/converse` with `think.endpoint.url` pointed at the proxy
+3. **User speaks** — The Voice Agent transcribes speech using Deepgram nova-3
+4. **Agent thinks** — The Voice Agent sends an OpenAI-format chat completion request to the proxy
+5. **Proxy routes** — Based on `LLM_PROVIDER` (or the `X-LLM-Provider` header), the proxy forwards to OpenAI or AWS Bedrock
+6. **Agent speaks** — The Voice Agent converts the LLM response to speech using Deepgram aura-2 and streams audio back
+
+To switch providers, change `LLM_PROVIDER` in your `.env` — no code changes needed. You can also override per-request using the `X-LLM-Provider: bedrock` header.
+
+## Starter templates
+
+[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories)
@@ -0,0 +1,8 @@
+deepgram-sdk==6.1.1
+fastapi==0.135.3
+starlette==1.0.0
+uvicorn[standard]==0.34.0
+httpx==0.28.1
+python-dotenv==1.2.2
+websockets==14.2
+boto3==1.37.23
@@ -0,0 +1,136 @@
+"""Demo: connect a Deepgram Voice Agent to the multi-provider proxy.
+
+This script opens a WebSocket to the Deepgram Voice Agent API with
+think.endpoint.url pointed at the local proxy server, then streams
+microphone audio and plays back the agent's TTS responses.
+
+Prerequisites:
+    1. Start the proxy:  uvicorn src.proxy:app --port 8080
+    2. Run this script:  python src/demo_agent.py
+
+The Voice Agent handles STT (nova-3) and TTS (aura-2) directly via
+Deepgram, while all LLM "thinking" goes through the proxy — which
+routes to whichever provider LLM_PROVIDER is set to.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import websockets
+import websockets.sync.client
+
+DG_AGENT_URL = "wss://agent.deepgram.com/v1/agent/converse"
+
+PROXY_URL = os.environ.get("PROXY_URL", "http://localhost:8080/v1/chat/completions")
+
+
+def build_settings(proxy_url: str = PROXY_URL) -> dict:
+    """Build the Voice Agent Settings message with the proxy as the LLM backend."""
+    return {
+        "type": "Settings",
+        "audio": {
+            "input": {
+                "encoding": "linear16",
+                "sample_rate": 16000,
+            },
+            "output": {
+                "encoding": "linear16",
+                "sample_rate": 16000,
+            },
+        },
+        "agent": {
+            "listen": {
+                "provider": {
+                    "type": "deepgram",
+                    "model": "nova-3",
+                },
+            },
+            "think": {
+                "provider": {
+                    "type": "open_ai",
+                    "model": "gpt-4o-mini",
+                },
+                "endpoint": {
+                    "url": proxy_url,
+                    "headers": {},
+                },
+                "prompt": (
+                    "You are a helpful voice assistant. Keep responses concise "
+                    "and conversational — the user is speaking, not reading."
+                ),
+            },
+            "speak": {
+                "provider": {
+                    "type": "deepgram",
+                    "model": "aura-2-thalia-en",
+                },
+            },
+            "greeting": "Hello! I'm your voice assistant. How can I help?",
+        },
+    }
+
+
+def run_agent(proxy_url: str = PROXY_URL) -> None:
+    """Connect to the Voice Agent and print events until interrupted."""
+    api_key = os.environ.get("DEEPGRAM_API_KEY")
+    if not api_key:
+        print("Error: DEEPGRAM_API_KEY not set", file=sys.stderr)
+        sys.exit(1)
+
+    settings = build_settings(proxy_url)
+
+    print(f"Connecting to Deepgram Voice Agent…")
+    print(f"  LLM proxy: {proxy_url}")
+
+    ws = websockets.sync.client.connect(
+        DG_AGENT_URL,
+        additional_headers={"Authorization": f"Token {api_key}"},
+    )
+
+    ws.send(json.dumps(settings))
+    print("Settings sent, waiting for agent…")
+
+    try:
+        while True:
+            raw = ws.recv()
+            if isinstance(raw, bytes):
+                print(f"  [audio] {len(raw)} bytes")
+                continue
+
+            msg = json.loads(raw)
+            msg_type = msg.get("type", "")
+
+            if msg_type == "Welcome":
+                print(f"  Connected — request_id: {msg.get('request_id')}")
+            elif msg_type == "SettingsApplied":
+                print("  Settings applied — agent ready")
+                print("  (Send audio to interact, or Ctrl+C to stop)")
+            elif msg_type == "ConversationText":
+                print(f"  [{msg.get('role')}] {msg.get('content')}")
+            elif msg_type == "AgentStartedSpeaking":
+                latency = msg.get("total_latency", 0)
+                print(f"  Agent speaking (latency: {latency:.2f}s)")
+            elif msg_type == "AgentAudioDone":
+                print("  Agent audio done")
+            elif msg_type == "Error":
+                print(f"  ERROR: {msg.get('description')} ({msg.get('code')})")
+            elif msg_type == "Warning":
+                print(f"  WARNING: {msg.get('description')}")
+            else:
+                print(f"  [{msg_type}] {json.dumps(msg)[:120]}")
+
+    except KeyboardInterrupt:
+        print("\nDisconnecting…")
+    finally:
+        ws.close()
+
+
+if __name__ == "__main__":
+    run_agent()
@@ -0,0 +1,121 @@
+"""LLM provider backends for the OpenAI-compatible proxy.
+
+Each provider implements chat_completion() which accepts OpenAI-format messages
+and returns an OpenAI-format response dict. This keeps the proxy layer thin —
+adding a new provider means writing one function.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+import uuid
+from typing import Any
+
+import httpx
+
+
+def openai_completion(
+    messages: list[dict[str, Any]],
+    model: str = "gpt-4o-mini",
+    **kwargs: Any,
+) -> dict[str, Any]:
+    """Forward the request to OpenAI's chat completions API."""
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError("OPENAI_API_KEY not set")
+
+    payload: dict[str, Any] = {"model": model, "messages": messages, **kwargs}
+
+    resp = httpx.post(
+        "https://api.openai.com/v1/chat/completions",
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+        json=payload,
+        timeout=60.0,
+    )
+    resp.raise_for_status()
+    return resp.json()
+
+
+def bedrock_completion(
+    messages: list[dict[str, Any]],
+    model: str = "anthropic.claude-3-haiku-20240307-v1:0",
+    **kwargs: Any,
+) -> dict[str, Any]:
+    """Forward the request to AWS Bedrock's Converse API and reformat as OpenAI."""
+    try:
+        import boto3
+    except ImportError as exc:
+        raise RuntimeError("boto3 is required for the bedrock provider") from exc
+
+    region = os.environ.get("AWS_REGION", "us-east-1")
+    client = boto3.client(
+        "bedrock-runtime",
+        region_name=region,
+        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
+    )
+
+    bedrock_messages = []
+    system_prompt = None
+    for msg in messages:
+        if msg["role"] == "system":
+            system_prompt = msg["content"]
+            continue
+        bedrock_messages.append({
+            "role": msg["role"],
+            "content": [{"text": msg["content"]}],
+        })
+
+    converse_kwargs: dict[str, Any] = {
+        "modelId": model,
+        "messages": bedrock_messages,
+    }
+    if system_prompt:
+        converse_kwargs["system"] = [{"text": system_prompt}]
+
+    response = client.converse(**converse_kwargs)
+
+    output_text = ""
+    if response.get("output", {}).get("message", {}).get("content"):
+        for block in response["output"]["message"]["content"]:
+            if "text" in block:
+                output_text += block["text"]
+
+    usage = response.get("usage", {})
+    return {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": output_text},
+                "finish_reason": response.get("stopReason", "end_turn"),
+            }
+        ],
+        "usage": {
+            "prompt_tokens": usage.get("inputTokens", 0),
+            "completion_tokens": usage.get("outputTokens", 0),
+            "total_tokens": usage.get("inputTokens", 0) + usage.get("outputTokens", 0),
+        },
+    }
+
+
+PROVIDERS = {
+    "openai": openai_completion,
+    "bedrock": bedrock_completion,
+}
+
+
+def get_provider(name: str):
+    """Return the completion function for the named provider."""
+    fn = PROVIDERS.get(name)
+    if fn is None:
+        raise ValueError(f"Unknown provider '{name}'. Available: {list(PROVIDERS.keys())}")
+    return fn