Fix Claude API timeout errors in Python. Set request timeouts, implement retry logic for network errors and 500s, and handle streaming timeouts correctly.
Network errors and server timeouts are the second most common production issue after rate limits. Here's how to configure timeouts and implement robust retry logic for the Claude API.
import httpx
import anthropic
client = anthropic.Anthropic(
timeout=httpx.Timeout(
connect=5.0, # seconds to establish TCP connection
read=120.0, # seconds waiting for a response chunk
write=10.0, # seconds for sending the request body
pool=5.0 # seconds waiting for a connection from the pool
)
)
# Per-request override
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=256,
messages=[{"role": "user", "content": "Hello"}],
timeout=30 # overrides client default for this call only
)
import time
import random
import httpx
import anthropic
from anthropic import APIConnectionError, APIStatusError, RateLimitError
client = anthropic.Anthropic(
max_retries=0, # handle retries manually
timeout=httpx.Timeout(connect=5, read=90, write=10, pool=5)
)
RETRYABLE_STATUS = {429, 500, 502, 503, 529}
def call_with_retry(messages, model="claude-haiku-4-5-20251001",
max_tokens=512, max_attempts=6):
delay = 1.0
for attempt in range(max_attempts):
try:
return client.messages.create(
model=model, max_tokens=max_tokens, messages=messages
)
except APIConnectionError as e:
# Network-level failure (DNS, TCP reset, timeout)
if attempt == max_attempts - 1:
raise
print(f"Connection error: {e}. Retrying in {delay:.1f}s")
except RateLimitError:
if attempt == max_attempts - 1:
raise
print(f"Rate limited. Retrying in {delay:.1f}s")
except APIStatusError as e:
if e.status_code not in RETRYABLE_STATUS or attempt == max_attempts - 1:
raise
print(f"Server error {e.status_code}. Retrying in {delay:.1f}s")
jitter = random.uniform(0, 1)
time.sleep(min(delay + jitter, 60))
delay = min(delay * 2, 60)
raise RuntimeError("Exhausted retries")
import anthropic
client = anthropic.Anthropic()
# Streaming: each chunk resets the read-timeout clock
# Use for any prompt expected to produce a long response
with client.messages.stream(
model="claude-sonnet-4-6",
max_tokens=2048,
messages=[{"role": "user", "content": "Write a detailed product spec."}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print() # newline after completion
import asyncio
import random
import httpx
import anthropic
from anthropic import AsyncAnthropic, APIConnectionError, APIStatusError
async def safe_create(client, messages, model, max_tokens, max_attempts=5):
delay = 1.0
for attempt in range(max_attempts):
try:
return await client.messages.create(
model=model, max_tokens=max_tokens, messages=messages
)
except (APIConnectionError, APIStatusError) as e:
status = getattr(e, "status_code", 0)
if status in {400, 401, 403} or attempt == max_attempts - 1:
raise
wait = min(delay + random.uniform(0, 1), 60)
await asyncio.sleep(wait)
delay *= 2
async def main():
client = AsyncAnthropic(
timeout=httpx.Timeout(connect=5, read=90, write=10, pool=5)
)
result = await safe_create(
client,
[{"role": "user", "content": "Explain asyncio in Python."}],
model="claude-haiku-4-5-20251001",
max_tokens=256
)
print(result.content[0].text)
await client.close()
asyncio.run(main())
| Scenario | Recommended read timeout | Notes |
|---|---|---|
| Short Q&A (Haiku, ≤256 tokens) | 30s | Haiku generates ~100 tok/s; 256 tokens ≈ 2.5s typical |
| Structured output (Sonnet, ≤1K tokens) | 60s | Covers peak-load variance |
| Long-form generation (Sonnet/Opus, ≤4K tokens) | 120–180s | Use streaming instead when possible |
| Streaming (any length) | Not needed* | *Read timeout fires between chunks, not over full response |
For rate limit handling alongside timeouts, see the rate limits guide. Use the Claude API Cost Calculator to size max_tokens correctly and avoid unnecessarily long timeouts.