Step-by-step guide to building a Python chatbot with the Claude API. Terminal chatbot, Flask web chatbot, streaming, multi-persona, and context window management — all with working code.
Building a chatbot with the Claude API requires two things: maintaining conversation history client-side, and choosing the right context-management strategy. This guide walks through four practical patterns from terminal prototype to production Flask web chatbot.
import anthropic
client = anthropic.Anthropic()
history = []
SYSTEM = "You are a helpful assistant."
def chat(user_message: str) -> str:
history.append({"role": "user", "content": user_message})
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=SYSTEM,
messages=history,
)
reply = response.content[0].text
history.append({"role": "assistant", "content": reply})
return reply
if __name__ == "__main__":
print("Chatbot ready. Type 'exit' to quit.")
while True:
user_input = input("You: ").strip()
if user_input.lower() == "exit":
break
print(f"Claude: {chat(user_input)}")
Claude Sonnet 4.6 has a 200K token window, but long histories increase latency and cost. Keep the last N turns:
MAX_TURNS = 20 # keep last 20 user/assistant pairs (40 messages)
def trim_history(history: list) -> list:
"""Keep only the last MAX_TURNS complete turns."""
# Each turn = 2 messages (user + assistant)
max_messages = MAX_TURNS * 2
if len(history) > max_messages:
return history[-max_messages:]
return history
def chat(user_message: str) -> str:
history.append({"role": "user", "content": user_message})
trimmed = trim_history(history)
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=SYSTEM,
messages=trimmed,
)
reply = response.content[0].text
history.append({"role": "assistant", "content": reply})
return reply
import anthropic
client = anthropic.Anthropic()
def chat_stream(history: list, user_message: str, system: str = "You are a helpful assistant."):
history.append({"role": "user", "content": user_message})
full_reply = ""
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system,
messages=history,
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
full_reply += text
print() # newline after streamed output
history.append({"role": "assistant", "content": full_reply})
return full_reply
from flask import Flask, request, Response, stream_with_context
import anthropic, json
app = Flask(__name__)
client = anthropic.Anthropic()
@app.route("/chat", methods=["POST"])
def chat():
data = request.get_json()
history = data.get("history", [])
user_message = data["message"]
system = data.get("system", "You are a helpful assistant.")
history.append({"role": "user", "content": user_message})
def generate():
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
system=system,
messages=history,
) as stream:
for text in stream.text_stream:
yield f"data: {json.dumps({'delta': text})}
"
yield "data: [DONE]
"
return Response(
stream_with_context(generate()),
mimetype="text/event-stream",
headers={"X-Accel-Buffering": "no"}, # disable Nginx buffering
)
if __name__ == "__main__":
app.run(debug=True)
PERSONAS = {
"support": "You are a friendly customer support agent for Acme Corp. Be concise and solution-focused. Escalate if you cannot resolve the issue.",
"coding": "You are an expert Python developer. Provide working code with explanations. Prefer standard library solutions.",
"tutor": "You are a patient math tutor. Explain concepts step-by-step using simple language. Use examples.",
}
sessions = {} # session_id → {"history": [...], "persona": str}
def get_reply(session_id: str, user_message: str, persona: str = "support") -> str:
if session_id not in sessions:
sessions[session_id] = {"history": [], "persona": persona}
session = sessions[session_id]
session["history"].append({"role": "user", "content": user_message})
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system=PERSONAS[session["persona"]],
messages=session["history"][-40:], # last 20 turns
)
reply = response.content[0].text
session["history"].append({"role": "assistant", "content": reply})
return reply
| Model | Cost (input/output per 1M tokens) | Best for |
|---|---|---|
| claude-haiku-4-5 | $0.80 / $4 | FAQ bots, simple customer service, high-volume chat |
| claude-sonnet-4-6 | $3 / $15 | General-purpose chatbots, coding assistants, support escalation |
| claude-opus-4-7 | $15 / $75 | Complex reasoning, legal/medical Q&A, multi-step agent tasks |
To estimate chatbot costs before launch, use the Claude API Cost Calculator. For multi-turn conversation patterns and token counting, see the conversation history guide.