Use the Anthropic Batch API in Python to process thousands of requests at 50% off standard pricing. Poll for results and handle partial failures.
The Batch API processes requests asynchronously at half the standard price — ideal for dataset analysis, eval runs, and bulk content generation.
import anthropic
client = anthropic.Anthropic()
requests = [
{
"custom_id": f"req-{i}",
"params": {
"model": "claude-sonnet-4-6",
"max_tokens": 512,
"messages": [{"role": "user", "content": f"Summarize: {article}"}]
}
}
for i, article in enumerate(articles) # your list of texts
]
batch = client.beta.messages.batches.create(requests=requests)
print(f"Batch ID: {batch.id} Status: {batch.processing_status}")
import time
def wait_for_batch(batch_id: str, poll_interval: int = 60) -> anthropic.types.beta.BetaMessageBatch:
while True:
batch = client.beta.messages.batches.retrieve(batch_id)
print(f"Status: {batch.processing_status} "
f"Succeeded: {batch.request_counts.succeeded} "
f"Errored: {batch.request_counts.errored}")
if batch.processing_status == "ended":
return batch
time.sleep(poll_interval)
completed = wait_for_batch(batch.id)
results = {}
errors = {}
for result in client.beta.messages.batches.results(batch.id):
if result.result.type == "succeeded":
results[result.custom_id] = result.result.message.content[0].text
elif result.result.type == "errored":
errors[result.custom_id] = result.result.error
print(f"Got {len(results)} results, {len(errors)} errors")
# Save to file
import json
with open("batch_results.json", "w") as f:
json.dump(results, f, indent=2)
| Model | Standard input | Batch input | Savings |
|---|---|---|---|
| Claude Sonnet 4.6 | $3.00 / 1M | $1.50 / 1M | 50% |
| Claude Haiku 4.5 | $1.00 / 1M | $0.50 / 1M | 50% |
| Claude Opus 4.7 | $15.00 / 1M | $7.50 / 1M | 50% |
Use the Cost Calculator to estimate savings for your batch workload. For real-time processing where latency matters, use the standard API with prompt caching instead.