Build a streaming Claude chatbot backend with FastAPI and the Anthropic SDK. Includes SSE streaming, async endpoints, CORS, and a minimal frontend.
FastAPI's async-first design pairs naturally with Claude's streaming API. This guide builds a production-ready streaming chatbot backend in under 50 lines.
pip install fastapi uvicorn anthropic
# main.py
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import anthropic
app = FastAPI()
client = anthropic.AsyncAnthropic() # reads ANTHROPIC_API_KEY from env
app.add_middleware(
CORSMiddleware,
allow_origins=["http://localhost:3000"], # restrict in production
allow_methods=["POST"],
allow_headers=["*"],
)
class ChatRequest(BaseModel):
message: str
system: str = "You are a helpful assistant."
async def stream_claude(message: str, system: str):
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system,
messages=[{"role": "user", "content": message}],
) as stream:
async for text in stream.text_stream:
yield f"data: {text}\n\n"
yield "data: [DONE]\n\n"
@app.post("/chat/stream")
async def chat_stream(req: ChatRequest):
return StreamingResponse(
stream_claude(req.message, req.system),
media_type="text/event-stream"
)
uvicorn main:app --reload
# POST to http://localhost:8000/chat/stream
curl -N -X POST http://localhost:8000/chat/stream \
-H "Content-Type: application/json" \
-d '{"message": "Explain FastAPI in 3 bullet points."}'
from typing import List
class Message(BaseModel):
role: str # "user" or "assistant"
content: str
class ChatHistoryRequest(BaseModel):
messages: List[Message]
system: str = "You are a helpful assistant."
async def stream_with_history(messages: list, system: str):
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system,
messages=[{"role": m.role, "content": m.content} for m in messages],
) as stream:
async for text in stream.text_stream:
yield f"data: {text}\n\n"
yield "data: [DONE]\n\n"
@app.post("/chat/history/stream")
async def chat_history_stream(req: ChatHistoryRequest):
return StreamingResponse(
stream_with_history(req.messages, req.system),
media_type="text/event-stream"
)
<!-- index.html -->
<textarea id="msg" rows="3" style="width:100%"></textarea>
<button onclick="send()">Send</button>
<pre id="out"></pre>
<script>
async function send() {
const out = document.getElementById('out');
out.textContent = '';
const res = await fetch('http://localhost:8000/chat/stream', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({message: document.getElementById('msg').value})
});
const reader = res.body.getReader();
const decoder = new TextDecoder();
while (true) {
const {done, value} = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
chunk.split('\n').forEach(line => {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
out.textContent += line.slice(6);
}
});
}
}
</script>
| Criterion | FastAPI | Flask |
|---|---|---|
| Async/await support | Native (ASGI) | Requires flask[async] / gevent |
| Streaming response | StreamingResponse built-in | Generator + stream_with_context |
| Request validation | Pydantic (automatic) | Manual or marshmallow |
| API docs | Auto-generated /docs | Requires Flask-Restx/Flasgger |
| Concurrent requests | Excellent (uvicorn + asyncio) | Limited (threaded WSGI by default) |
FastAPI is the recommended choice for Claude chatbot backends in 2026. Estimate token costs for your expected traffic at the Claude API Cost Calculator. For more API patterns, see the streaming Python guide and async Python patterns.