How to call the Claude API from an Express.js backend in 2026. Covers minimal handler, streaming SSE to browser, conversation history with sessions, and rate-limit middleware.
Express.js is the most widely used Node.js web framework. This guide shows how to wire Claude into an Express backend — from a minimal single-endpoint handler through streaming SSE, conversation history, and production-ready rate limiting.
npm install express @anthropic-ai/sdk express-session
import express from "express";
import Anthropic from "@anthropic-ai/sdk";
const app = express();
app.use(express.json());
const client = new Anthropic(); // reads ANTHROPIC_API_KEY from env
app.post("/api/chat", async (req, res) => {
const { message } = req.body;
const response = await client.messages.create({
model: "claude-haiku-4-5-20251001",
max_tokens: 1024,
messages: [{ role: "user", content: message }],
});
res.json({ reply: response.content[0].text });
});
app.listen(3000, () => console.log("Listening on :3000"));
app.post("/api/chat/stream", async (req, res) => {
const { message } = req.body;
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("X-Accel-Buffering", "no"); // disable Nginx buffering
const stream = client.messages.stream({
model: "claude-haiku-4-5-20251001",
max_tokens: 1024,
messages: [{ role: "user", content: message }],
});
for await (const event of stream) {
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
res.write(`data: ${JSON.stringify({ text: event.delta.text })}
`);
}
}
res.write("data: [DONE]
");
res.end();
});
import session from "express-session";
app.use(session({
secret: process.env.SESSION_SECRET,
resave: false,
saveUninitialized: true,
cookie: { maxAge: 3600000 }, // 1h session
}));
app.post("/api/chat/history", async (req, res) => {
const { message } = req.body;
if (!req.session.history) req.session.history = [];
req.session.history.push({ role: "user", content: message });
// cap at 20 turns to stay within context
const trimmed = req.session.history.slice(-20);
const response = await client.messages.create({
model: "claude-haiku-4-5-20251001",
max_tokens: 1024,
messages: trimmed,
});
const reply = response.content[0].text;
req.session.history.push({ role: "assistant", content: reply });
res.json({ reply, turns: req.session.history.length / 2 });
});
// DELETE /api/chat/history resets context
app.delete("/api/chat/history", (req, res) => {
req.session.history = [];
res.json({ ok: true });
});
import rateLimit from "express-rate-limit";
const chatLimiter = rateLimit({
windowMs: 60_000, // 1 minute
max: 20, // 20 requests per IP per minute
standardHeaders: true,
legacyHeaders: false,
handler: (req, res) => res.status(429).json({
error: "Too many requests. Please wait.",
retryAfter: Math.ceil(req.rateLimit.resetTime / 1000),
}),
});
app.post("/api/chat", chatLimiter, async (req, res) => { /* ... */ });
| Approach | Best for | Streaming | Session support | Overhead |
|---|---|---|---|---|
| Express.js | Standalone REST API, microservice | SSE or WebSocket | express-session + Redis | Minimal |
| Next.js API routes | Full-stack React app | ReadableStream (App Router) | iron-session or Auth.js | Bundled with frontend |
| Node.js http | Ultra-minimal, no dependencies | Manual | Manual | Zero |
| Fastify | High-throughput APIs | SSE plugin | @fastify/session | Lower than Express |
Estimate API costs before going to production with the Claude API Cost Calculator. For the Next.js integration pattern (App Router + Server Components), see the Next.js example.