Handle rate limits, overloads, and API errors from the Anthropic SDK in Python. Implement exponential backoff, retries, and graceful degradation.
The Anthropic SDK raises typed exceptions that map cleanly to retry strategies.
anthropic.APIError # base class
├── anthropic.APIConnectionError # network/timeout
├── anthropic.APIStatusError # HTTP 4xx / 5xx
│ ├── anthropic.BadRequestError # 400
│ ├── anthropic.AuthenticationError # 401
│ ├── anthropic.PermissionDeniedError # 403
│ ├── anthropic.NotFoundError # 404
│ ├── anthropic.RateLimitError # 429
│ ├── anthropic.InternalServerError # 500
│ └── anthropic.OverloadedError # 529
└── anthropic.APITimeoutError # request timed out
import anthropic
import time
client = anthropic.Anthropic()
def safe_call(prompt: str) -> str | None:
try:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
except anthropic.RateLimitError as e:
# 429: you've exceeded your rate limit
print(f"Rate limited. Retry after: {e.response.headers.get('retry-after', '60')}s")
return None
except anthropic.OverloadedError:
# 529: Anthropic servers are overloaded — transient
print("API overloaded. Try again in a few seconds.")
return None
except anthropic.AuthenticationError:
# 401: bad API key — don't retry
raise
except anthropic.APIConnectionError as e:
print(f"Network error: {e}")
return None
import functools
import random
import time
def with_retry(max_attempts: int = 4, base_delay: float = 1.0):
def decorator(fn):
@functools.wraps(fn)
def wrapper(*args, **kwargs):
for attempt in range(max_attempts):
try:
return fn(*args, **kwargs)
except (anthropic.RateLimitError, anthropic.OverloadedError) as e:
if attempt == max_attempts - 1:
raise
delay = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
print(f"Attempt {attempt + 1} failed ({type(e).__name__}). Retrying in {delay:.1f}s...")
time.sleep(delay)
except anthropic.APIStatusError:
raise # 4xx errors should not be retried
return wrapper
return decorator
@with_retry(max_attempts=4)
def call_api(prompt: str) -> str:
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
result = call_api("Summarize the history of Python.")
# SDK retries 2× by default on 529 and 5xx
client = anthropic.Anthropic(
max_retries=5, # increase for batch workloads
timeout=30.0 # default is 600s; set lower for interactive apps
)
For Batch API workloads that tolerate latency in exchange for 50% cost savings, see the Batch API Python example. For pricing context, see the Anthropic API pricing 2026 page.