Build a Claude chatbot backend with Flask in Python. Includes streaming with SSE using stream_with_context, conversation history endpoint, and CORS setup.
Flask is the most widely deployed Python web framework. This guide shows how to build a Claude chatbot backend with Flask — from minimal synchronous calls to full server-sent event (SSE) streaming.
pip install flask anthropic flask-cors
from flask import Flask, request, jsonify
import anthropic
app = Flask(__name__)
client = anthropic.Anthropic() # reads ANTHROPIC_API_KEY from env
@app.route("/chat", methods=["POST"])
def chat():
user_message = request.json.get("message", "")
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}]
)
return jsonify({"reply": response.content[0].text})
if __name__ == "__main__":
app.run(debug=True)
Test it:
curl -X POST http://localhost:5000/chat \
-H "Content-Type: application/json" \
-d '{"message": "What is prompt caching?"}'
from flask import Flask, request, Response, stream_with_context
import anthropic, json
app = Flask(__name__)
client = anthropic.Anthropic()
@app.route("/chat/stream", methods=["POST"])
def chat_stream():
user_message = request.json.get("message", "")
def generate():
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": user_message}]
) as stream:
for text in stream.text_stream:
yield f"data: {json.dumps({'text': text})}\n\n"
yield "data: [DONE]\n\n"
return Response(
stream_with_context(generate()),
mimetype="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no" # disable nginx buffering
}
)
const source = new EventSource('/chat/stream'); // GET version
// Or for POST with fetch:
const resp = await fetch('/chat/stream', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({message: 'Hello Claude'})
});
const reader = resp.body.getReader();
const decoder = new TextDecoder();
while (true) {
const {done, value} = await reader.read();
if (done) break;
const lines = decoder.decode(value).split('\n');
for (const line of lines) {
if (line.startsWith('data: ') && line !== 'data: [DONE]') {
const {text} = JSON.parse(line.slice(6));
document.getElementById('output').textContent += text;
}
}
}
from flask import Flask, request, jsonify, session
import anthropic
app = Flask(__name__)
app.secret_key = "change-me-in-production"
client = anthropic.Anthropic()
@app.route("/chat/history", methods=["POST"])
def chat_with_history():
if "history" not in session:
session["history"] = []
user_message = request.json.get("message", "")
session["history"].append({"role": "user", "content": user_message})
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=session["history"]
)
assistant_reply = response.content[0].text
session["history"].append({"role": "assistant", "content": assistant_reply})
session.modified = True # required: Flask won't detect nested mutations
return jsonify({"reply": assistant_reply, "turns": len(session["history"]) // 2})
@app.route("/chat/reset", methods=["POST"])
def reset():
session.pop("history", None)
return jsonify({"ok": True})
from flask_cors import CORS
app = Flask(__name__)
CORS(app, origins=["http://localhost:3000", "https://yourapp.com"])
| Criterion | Flask | FastAPI |
|---|---|---|
| Streaming | stream_with_context + generator | StreamingResponse built-in |
| Async support | Requires flask[async] / gevent | Native async/await (ASGI) |
| Request validation | Manual or marshmallow | Pydantic (automatic) |
| Ecosystem fit | Flask-Login, Flask-SQLAlchemy, Flask-Admin | Pydantic models, SQLModel |
| Deploy | Gunicorn (WSGI) | Uvicorn (ASGI) |
Estimate token costs for your expected traffic at the Claude API Cost Calculator. For async patterns see the FastAPI streaming guide and async Python patterns.