How to stream Claude API responses in Python using the Anthropic SDK. Print tokens as they arrive instead of waiting for the full response.
Streaming lets you display Claude's response token-by-token, improving perceived latency for long outputs like code generation or document drafts.
import anthropic
client = anthropic.Anthropic()
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "Write a 200-word product description for a noise-cancelling headset."}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print() # newline after stream ends
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "Summarize the history of Python."}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
# After stream closes, get final message with usage
final = stream.get_final_message()
print(f"
Tokens: {final.usage.input_tokens} in / {final.usage.output_tokens} out")
import asyncio
import anthropic
client = anthropic.AsyncAnthropic()
async def main():
async with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": "Write a haiku about async programming."}]
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
asyncio.run(main())
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=2048,
system="You are an expert Python developer. Write clean, commented code.",
messages=[{"role": "user", "content": "Write a function that parses ISO 8601 dates."}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
Use the Claude Cost Calculator to measure token costs for streaming sessions. For Node.js streaming, see the streaming Node.js example.