Build real-time Claude API streaming over WebSocket in Python (2026). Uses FastAPI WebSockets + AsyncAnthropic to push tokens to the browser as they arrive.
The Anthropic Python SDK's streaming API returns tokens as they are generated, making it ideal for real-time chat UIs. Pairing it with FastAPI WebSockets lets you push each token to the browser the instant it arrives — no polling needed.
pip install anthropic fastapi uvicorn websockets
import asyncio
import anthropic
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
import json
app = FastAPI()
client = anthropic.AsyncAnthropic() # reads ANTHROPIC_API_KEY from env
@app.websocket("/ws/chat")
async def chat_ws(websocket: WebSocket):
await websocket.accept()
try:
while True:
# Receive message from browser
data = await websocket.receive_text()
payload = json.loads(data)
user_message = payload.get("message", "")
# Stream Claude response token by token
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}]
) as stream:
async for text in stream.text_stream:
await websocket.send_text(
json.dumps({"type": "delta", "text": text})
)
# Signal completion
final = await stream.get_final_message()
await websocket.send_text(json.dumps({
"type": "done",
"input_tokens": final.usage.input_tokens,
"output_tokens": final.usage.output_tokens
}))
except WebSocketDisconnect:
pass # Client disconnected — SDK cleans up the Anthropic HTTP stream
ANTHROPIC_API_KEY=sk-ant-... uvicorn app:app --host 0.0.0.0 --port 8000
const ws = new WebSocket("ws://localhost:8000/ws/chat");
const output = document.getElementById("output");
ws.onmessage = (event) => {
const msg = JSON.parse(event.data);
if (msg.type === "delta") {
output.textContent += msg.text; // append token as it arrives
} else if (msg.type === "done") {
console.log("Used " + msg.input_tokens + " input / " + msg.output_tokens + " output tokens");
}
};
function sendMessage(text) {
output.textContent = ""; // clear previous response
ws.send(JSON.stringify({ message: text }));
}
// Example: sendMessage("Explain streaming in one paragraph")
@app.websocket("/ws/chat/multi")
async def multi_turn_ws(websocket: WebSocket):
await websocket.accept()
history = [] # persisted per-connection
try:
while True:
data = await websocket.receive_text()
payload = json.loads(data)
user_message = payload.get("message", "")
# Append user turn
history.append({"role": "user", "content": user_message})
# Collect full assistant reply while streaming
assistant_text = ""
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=history
) as stream:
async for text in stream.text_stream:
assistant_text += text
await websocket.send_text(
json.dumps({"type": "delta", "text": text})
)
# Append assistant turn to history for next round
history.append({"role": "assistant", "content": assistant_text})
await websocket.send_text(json.dumps({"type": "done"}))
except WebSocketDisconnect:
pass
@app.websocket("/ws/chat/cancellable")
async def cancellable_ws(websocket: WebSocket):
await websocket.accept()
cancel_flag = False
async def listen_for_cancel():
nonlocal cancel_flag
try:
while True:
msg = await websocket.receive_text()
if json.loads(msg).get("type") == "cancel":
cancel_flag = True
except WebSocketDisconnect:
cancel_flag = True
asyncio.create_task(listen_for_cancel())
try:
while True:
# Wait for a prompt (if not a cancel)
cancel_flag = False
# (simplified — real impl would use a queue)
data = await websocket.receive_text()
payload = json.loads(data)
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": payload.get("message", "")}]
) as stream:
async for text in stream.text_stream:
if cancel_flag:
await websocket.send_text(json.dumps({"type":"done","cancelled":True}))
break
await websocket.send_text(json.dumps({"type": "delta", "text": text}))
else:
await websocket.send_text(json.dumps({"type": "done", "cancelled": False}))
except WebSocketDisconnect:
pass
| Feature | Server-Sent Events (SSE) | WebSocket |
|---|---|---|
| Direction | Server → client only | Bidirectional |
| Cancel mid-stream | Client closes connection | Send cancel message |
| Browser support | All modern browsers | All modern browsers |
| Proxy / CDN | Works with most CDNs | Requires WS-aware proxy |
| Best for | Static display, summaries | Interactive chat, multi-turn |
For per-request cost calculations using the token counts returned in the done event, use the Claude API Cost Calculator. For the pure HTTP streaming pattern without WebSockets, see the streaming guide.