Python code to calculate and compare API costs across Claude, GPT-4o, and Gemini for any prompt or workload. Includes a free interactive tool.
Picking the cheapest LLM API for your workload requires comparing token prices AND factoring in caching. This Python module handles the math for all major providers.
| Model | Input $/M | Output $/M | Cached read $/M |
|---|---|---|---|
| Claude Haiku 4.5 | $0.80 | $4.00 | $0.08 |
| Claude Sonnet 4.6 | $3.00 | $15.00 | $0.30 |
| Claude Opus 4.7 | $15.00 | $75.00 | $1.50 |
| GPT-4o | $2.50 | $10.00 | $1.25 |
| GPT-4o-mini | $0.15 | $0.60 | $0.075 |
| Gemini 2.0 Flash | $0.10 | $0.40 | — |
| Gemini 1.5 Pro | $1.25 | $5.00 | — |
from dataclasses import dataclass
from typing import Optional
@dataclass
class ModelPricing:
name: str
input_per_m: float # $ per million input tokens
output_per_m: float # $ per million output tokens
cache_read_per_m: Optional[float] = None # None = no caching
MODELS = {
"claude-haiku": ModelPricing("Claude Haiku 4.5", 0.80, 4.00, 0.08),
"claude-sonnet": ModelPricing("Claude Sonnet 4.6", 3.00, 15.00, 0.30),
"claude-opus": ModelPricing("Claude Opus 4.7", 15.00, 75.00, 1.50),
"gpt-4o": ModelPricing("GPT-4o", 2.50, 10.00, 1.25),
"gpt-4o-mini": ModelPricing("GPT-4o-mini", 0.15, 0.60, 0.075),
"gemini-flash": ModelPricing("Gemini 2.0 Flash", 0.10, 0.40),
"gemini-pro": ModelPricing("Gemini 1.5 Pro", 1.25, 5.00),
}
def calculate_cost(
model_key: str,
input_tokens: int,
output_tokens: int,
cached_tokens: int = 0,
) -> dict:
p = MODELS[model_key]
fresh_input = input_tokens - cached_tokens
cost = (fresh_input / 1_000_000) * p.input_per_m
cost += (output_tokens / 1_000_000) * p.output_per_m
if cached_tokens > 0 and p.cache_read_per_m:
cost += (cached_tokens / 1_000_000) * p.cache_read_per_m
return {
"model": p.name,
"total_cost_usd": round(cost, 6),
"cost_per_1k_calls_usd": round(cost * 1000, 4),
}
# Compare all models for a typical chatbot turn
# 8K system prompt (cached after first call), 200 user tokens, 500 output tokens
for key in MODELS:
result = calculate_cost(key, input_tokens=8200, output_tokens=500, cached_tokens=8000)
print(f"{result['model']:25s} ${result['total_cost_usd']:.6f}/call ${result['cost_per_1k_calls_usd']:.3f}/1K calls")
import anthropic
client = anthropic.Anthropic()
response = client.messages.create(
model="claude-sonnet-4-6",
max_tokens=512,
system="You are a helpful assistant.",
messages=[{"role": "user", "content": "What is prompt caching?"}]
)
# Real token usage from the response
input_tok = response.usage.input_tokens
output_tok = response.usage.output_tokens
cache_read = getattr(response.usage, "cache_read_input_tokens", 0)
cost = calculate_cost("claude-sonnet", input_tok, output_tok, cache_read)
print(f"This call cost: ${cost['total_cost_usd']:.6f}")
print(f"At 10K calls/day: ${cost['total_cost_usd'] * 10000:.2f}/day")
def monthly_cost_projection(model_key, calls_per_day, avg_input_tokens, avg_output_tokens, cached_tokens=0):
single_call = calculate_cost(model_key, avg_input_tokens, avg_output_tokens, cached_tokens)
daily = single_call["total_cost_usd"] * calls_per_day
monthly = daily * 30
return {"daily_usd": round(daily, 2), "monthly_usd": round(monthly, 2)}
# Chatbot: 5K calls/day, 10K system prompt (cached), 200 user tokens, 500 output tokens
for key, label in [("claude-haiku","Haiku"), ("gpt-4o-mini","GPT-4o-mini"), ("gemini-flash","Gemini Flash")]:
p = monthly_cost_projection(key, calls_per_day=5000, avg_input_tokens=10200, avg_output_tokens=500, cached_tokens=10000)
print(f"{label:15s} ${p['daily_usd']:7.2f}/day ${p['monthly_usd']:8.2f}/month")
Want to paste a prompt and instantly compare costs across Claude, GPT-4o, and Gemini without writing code? Use the LLM Prompt Pricing Calculator — it counts tokens and shows the exact cost per provider in real time.