How to use Claude's extended thinking mode in Python. Working code for streaming and non-streaming extended thinking with token budgets and cost estimates.
Claude's extended thinking mode lets the model reason through hard problems before answering. This guide shows how to enable it in Python, interpret the response, and control costs.
import anthropic
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=16000, # must be > budget_tokens
thinking={
"type": "enabled",
"budget_tokens": 10000 # how many tokens Claude can use for reasoning
},
messages=[{
"role": "user",
"content": "What is the most efficient algorithm for finding the k-th largest element in an unsorted array? Compare time and space complexity of at least three approaches."
}]
)
# Response has two content blocks: thinking + text
for block in response.content:
if block.type == "thinking":
print("=== Claude's Reasoning ===")
print(block.thinking[:500], "...") # show first 500 chars
elif block.type == "text":
print("\n=== Final Answer ===")
print(block.text)
# Token usage
print(f"\nInput tokens: {response.usage.input_tokens}")
print(f"Output tokens (incl. thinking): {response.usage.output_tokens}")
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=16000,
thinking={"type": "enabled", "budget_tokens": 8000},
messages=[{"role": "user", "content": "Debug this recursive function and explain why it has exponential time complexity:\n\ndef fib(n):\n if n <= 1: return n\n return fib(n-1) + fib(n-2)"}]
) as stream:
current_block_type = None
for event in stream:
if hasattr(event, "type"):
if event.type == "content_block_start":
current_block_type = event.content_block.type
if current_block_type == "thinking":
print("\n[Thinking...]", end="", flush=True)
elif current_block_type == "text":
print("\n\n[Answer]\n", end="", flush=True)
elif event.type == "content_block_delta":
if hasattr(event.delta, "thinking"):
print(".", end="", flush=True) # progress dots for thinking
elif hasattr(event.delta, "text"):
print(event.delta.text, end="", flush=True)
# Thinking budget guidelines
BUDGETS = {
"quick_reasoning": 1000, # simple logic, short proofs
"standard_reasoning": 5000, # coding problems, analysis
"deep_reasoning": 16000, # complex math, research tasks
"maximum": 32000, # hardest problems, long-form planning
}
# Cost estimate per call (Claude Sonnet at $15/M output tokens)
for label, budget in BUDGETS.items():
thinking_cost = (budget / 1_000_000) * 15 # thinking billed as output
print(f"{label:25s} budget={budget:6d} max thinking cost=${thinking_cost:.4f}")
def deep_code_review(code: str, language: str = "python") -> dict:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=16000,
thinking={"type": "enabled", "budget_tokens": 8000},
messages=[{
"role": "user",
"content": f"""Perform a deep code review of this {language} code. Think through:
- Time and space complexity
- Edge cases and potential bugs
- Security vulnerabilities
- Refactoring opportunities
Code:
{code}
Provide a structured review with severity ratings."""
}]
)
thinking = next((b.thinking for b in response.content if b.type == "thinking"), "")
answer = next((b.text for b in response.content if b.type == "text"), "")
return {
"reasoning": thinking,
"review": answer,
"tokens_used": response.usage.output_tokens,
"cost_usd": (response.usage.output_tokens / 1_000_000) * 15
}
| Budget | Claude Sonnet cost | Claude Opus cost | Best for |
|---|---|---|---|
| 1K tokens | ~$0.015 | ~$0.075 | Simple logic, fact checks |
| 5K tokens | ~$0.075 | ~$0.375 | Coding problems, analysis |
| 10K tokens | ~$0.15 | ~$0.75 | Complex debugging, math |
| 32K tokens | ~$0.48 | ~$2.40 | Research, strategic planning |
Calculate costs for your specific thinking workloads with the Claude API Cost Calculator.