Build multi-turn chat with Claude in Python. Manage conversation history, control context length, and implement memory patterns with the Anthropic SDK.
Since the Claude API is stateless, building a chat requires storing and sending the full conversation history each turn.
import anthropic
client = anthropic.Anthropic()
def chat():
messages = []
print("Claude Chat — type 'quit' to exit
")
while True:
user_input = input("You: ").strip()
if user_input.lower() == "quit":
break
messages.append({"role": "user", "content": user_input})
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
system="You are a helpful assistant. Be concise.",
messages=messages
)
assistant_reply = response.content[0].text
messages.append({"role": "assistant", "content": assistant_reply})
print(f"
Claude: {assistant_reply}
")
chat()
MAX_CONTEXT_TOKENS = 150_000
def trim_history(messages: list, max_tokens: int = MAX_CONTEXT_TOKENS) -> list:
"""Remove oldest message pairs until estimated token count is under budget."""
# Rough estimate: 1 token ≈ 4 chars
while len(messages) > 2:
total_chars = sum(len(str(m["content"])) for m in messages)
if total_chars / 4 < max_tokens:
break
# Drop the oldest user + assistant pair (keep first message for context)
messages = [messages[0]] + messages[3:]
return messages
messages = []
cumulative_tokens = 0
def chat_with_trimming(user_input: str) -> str:
global messages, cumulative_tokens
messages.append({"role": "user", "content": user_input})
messages = trim_history(messages)
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=messages
)
reply = response.content[0].text
messages.append({"role": "assistant", "content": reply})
cumulative_tokens += response.usage.input_tokens + response.usage.output_tokens
return reply
def chat_with_rag(user_input: str, context_docs: list[str]) -> str:
context = "
".join(context_docs)
augmented_message = f"Context:
{context}
Question: {user_input}"
messages.append({"role": "user", "content": augmented_message})
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=messages
)
reply = response.content[0].text
messages.append({"role": "assistant", "content": reply})
return reply
For prompt caching on repeated system prompts, see the prompt caching Python example. For the Node.js equivalent, see the Node.js quickstart.